In [1]:
%load_ext catalyst

In [None]:
# Setup matplotlib to display graphs inline in this Notebook
%matplotlib inline

In [2]:
#Cointegraion approach using Johanson test. (Also you can try to use traditional 2-step cointegraion test)

from datetime import timedelta

import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels


from statsmodels.tsa.stattools import coint
from statsmodels.tsa.stattools import adfuller
from catalyst import run_algorithm
from catalyst.api import (symbols, order, symbol, order_target_percent, order_percent)
from catalyst.exchange.utils.exchange_utils import get_exchange_symbols


def initialize(context):
    context.i = -1  # minute bar counter
    context.exchange = list(context.exchanges.values())[0].name.lower() #bitfinex
    context.quote_currency = list(
        context.exchanges.values())[0].quote_currency.lower()
    #STRATEGY
    # We need to make a matrix of UNIVERSE CLOSE prices for a given FORMATION_PERIOD and TRADING_PERIOD
    context.price = 'close' # we use 'close' to make computations 
    context.formation_period = 100 #in bars
    context.trading_period = 10 # in bars
#     context.number_of_trading_periods = 100 #number of periods we want to trade
    context.number_of_coins = 10 #number of TOP coins by VOLUME in universe we want to trade
    context.number_of_pairs = 5 #number of coin pairs we want to form
    context.minutes = 60
    context.frequency = '{}T'.format(context.minutes)
    context.tp_count = -1
    context.pairs_universe = None
    context.std_open_mult = 2
    context.std_sl_mult = 3 
    context.std_close_mult = 1
    context.explicit_coins = ['btc_usd', 'eth_usd', 'xrp_usd', 'ltc_usd', 'eos_usd', 'iot_usd', 'neo_usd', 'etc_usd', 'zec_usd', 'dsh_usd', 'xmr_usd', 'xlm_usd', 'trx_usd']
    context.set_commission(maker=0, taker=0)

def handle_data(context, data):
    #minute counter
    context.i += 1
    #FORMATION LOGIC
    
    #We skip first FP bars
    if context.i < context.formation_period*context.minutes:
        return
    #Every beginning of TP we look FP bars ago and get universe of existing assets for this period.
    if context.i % (context.trading_period*context.minutes) == 0:
        now = data.current_dt
        past = now - timedelta(minutes=context.formation_period*context.minutes)
        context.coins = symbols(*context.explicit_coins)  # convert all the pairs to SYMBOLS format which is necessary for data.history
        #To simplify computation, we want to reduce our UNIVERSE according to rule of highest volume in a day
        yesterday_volume = data.history(context.coins,'volume',bar_count=2,frequency='1D')[0:1].T #we get volume in BASE currency
        yesterday_close = data.history(context.coins,'close',bar_count=2,frequency='1D')[0:1].T
#         non_stable = abs(yesterday_close[yesterday_close.columns[0]]-1) > 0.3 # we use condition to eleminate stable coins
#         yesterday_volume = yesterday_volume[non_stable]*yesterday_close[non_stable] #we convert volume into quote currency terms
        yesterday_volume = yesterday_volume.nlargest(context.number_of_coins, columns=yesterday_volume.columns[0]) # we pick only high traded coins
        context.coins_reduced = list(yesterday_volume.index) # the same as making by symbols function
        #close positions if they open
        for coin in context.portfolio.positions:
            order_target_percent(coin, 0)
#             print(coin.symbol, ':', context.portfolio.positions[coin].amount, 'sold out')
        display(context.portfolio.portfolio_value)
    if context.i % (context.trading_period*context.minutes) == 0:  
        #1)we take prices
        close = data.history(context.coins_reduced,'close',bar_count=context.formation_period,frequency=context.frequency)
        #check ADF test
        for coin in close.columns:
            results = adfuller(close[coin].values, autolag='BIC')
            #drop coin if null rejected
            if results[1] < 0.05:
                print(coin, 'is not unit-root, excluding...')
                close = close.drop(coin, axis=1)
        close_log = close.apply(np.log)
        #3)then we make all possible spreads
        pairs_spread = pd.DataFrame(index=close_log.index)
        pairs_spread_std = pd.DataFrame(index=['std']) # dm is for distance measure
        ols_results = pd.DataFrame(index=['ols_results'])
        residuals = pd.DataFrame()
        adf_test = pd.DataFrame()
        for i,j in itertools.combinations(close_log.columns,2): 
            ols_results[(i,j)] = sm.OLS(close[i], sm.add_constant(close[j])).fit()
#             print(sm.add_constant(close_log[j]))
            residuals[(i,j)] = ols_results[(i,j)].iloc[0].resid
            adf_test[(i,j)] = adfuller(residuals[(i,j)], autolag='BIC')
            pairs_spread[(i,j)] = close_log[i] - close_log[j] 
            pairs_spread[(i,j)] = pairs_spread[(i,j)] - pairs_spread[(i,j)].mean()
            pairs_spread_std[(i,j)] = pairs_spread[(i,j)].std()
        for pair in adf_test.columns:
            if adf_test[pair].iloc[1] > 0.05:
                ols_results.drop(pair, axis=1, inplace=True)
#         parameters = 
        ols_params = list(x.params for x in ols_results.loc['ols_results'].values)
        ols_fitted = list(x.fittedvalues for x in ols_results.loc['ols_results'].values)
        display(ols_params)

        # 5) now we reduce universe to top NUMBER_OF_PAIRS pairs according by lowest distance measure:
        context.pairs_universe = list(pairs_spread_std.sort_values(by='std', axis=1).columns[: context.number_of_pairs])
#         display(context.i, pairs_spread_std, pairs_spread, pairs_spread_std.sort_values(by='std', axis=1))
        # 6) we create empty dataset for future trading data
        context.dataset = pd.DataFrame(index=range(context.trading_period), columns=context.pairs_universe) 
        # 7) and compute all standard deviations for trading logic
        for pair in context.pairs_universe:
            context.dataset.loc['std', pair] = pairs_spread[pair].std()
            context.dataset.loc['mean', pair] = (close_log[pair[0]] - close_log[pair[1]]).mean()
#             plt.figure()
#             plt.plot(pairs_spread[pair])
#             plt.axhline(y=context.dataset[pair].loc['std'], color='r', linestyle='-')
#             plt.show()
#         display(context.dataset)
        #for positions: long or short
        context.pos = pd.DataFrame(index = range(context.trading_period), columns=context.pairs_universe)
        context.pos.iloc[0] = 0
        #for bans of long or short
        context.pos_ban = pd.DataFrame(index = range(context.trading_period), columns=context.pairs_universe)
        #for orders logic. id of transaction to close further.
        context.transactions_open = pd.DataFrame(index=(0,1), columns=context.pairs_universe)
#         display(np.isnan(context.pos_ban.iloc[0,0]))


        
    #TRADING LOGIC
    
    if context.i % context.minutes == 0 and context.pairs_universe: #we trade each CONTEXT.MINUTES minutes and need pairs_universe to exist
        context.tp_count = int(context.i%(context.trading_period*context.minutes)/context.minutes) # goes from 0 to (context.trading_period - 1)
#         display(context.tp_count)
        for pair in context.dataset.columns: # here pair contains TWO coins
            current_price = data.current(pair, context.price)
            current_price_log = current_price.apply(np.log)
            context.dataset.loc[context.tp_count, pair] = current_price_log[0] - current_price_log[1] - context.dataset[pair].loc['mean'] # here we get pair log-spread 
#             display(context.std_open_mult*context.dataset[pair].loc[context.tp_count], 'std: ', context.dataset[pair].loc['std'])
            #Check if it's not 0 bar of trading (it is the bar when we form our pairs_universe)
            if context.tp_count>0:
                #we explicitly assign current position as previous
                context.pos.loc[context.tp_count, [pair]] = context.pos.loc[context.tp_count-1, [pair]]
                context.pos_ban.loc[context.tp_count, [pair]] = context.pos_ban.loc[context.tp_count-1, [pair]]
                # then we start to manipulate with current positions if they comply with some conditions
                
                #if BAN
                if type(context.pos_ban[pair].loc[context.tp_count])==str:
                    if (context.pos_ban[pair].loc[context.tp_count]=='short' and 
                           context.dataset[pair].loc[context.tp_count] < context.std_open_mult*context.dataset[pair].loc['std']):
                        context.pos_ban.loc[context.tp_count, pair] = np.nan
                        print(context.i, 'unban of short: {}, {}'.format(pair[0], pair[1]))
                    elif (context.pos_ban[pair].loc[context.tp_count]=='long' and 
                           context.dataset[pair].loc[context.tp_count] > -context.std_open_mult*context.dataset[pair].loc['std']):
                        context.pos_ban.loc[context.tp_count, pair] = np.nan
                        print(context.i, 'unban of long: {}, {}'.format(pair[0], pair[1]))
                
                #If NO POSITION:
                if context.pos[pair].loc[context.tp_count] == 0 and pd.isnull(context.pos_ban[pair].loc[context.tp_count]):
                    #SHORT SPREAD
                    if (context.dataset[pair].loc[context.tp_count] > context.std_open_mult*context.dataset[pair].loc['std']):
                        print(context.i, 'short {} long {}'.format(pair[0], pair[1]))
                        context.pos.loc[context.tp_count, pair] = -1
                    #LONG SPREAD    
                    elif (context.dataset[pair].loc[context.tp_count] < -context.std_open_mult*context.dataset[pair].loc['std']):
                        print(context.i, 'long {} short {}'.format(pair[0], pair[1]))
                        context.pos.loc[context.tp_count, pair] = 1
                #If SHORT POSITION:
                if context.pos[pair].loc[context.tp_count] == -1:
                    #TAKE PROFIT SHORT
                    if context.dataset[pair].loc[context.tp_count] < context.std_close_mult*context.dataset[pair].loc['std']:
                        print(context.i, 'take profit of short: {}, {}'.format(pair[0], pair[1])) 
                        context.pos.loc[context.tp_count, pair] = 0
                    #STOP-LOSS SHORT
                    elif context.dataset[pair].loc[context.tp_count] > context.std_sl_mult*context.dataset[pair].loc['std']:
                        print(context.i, 'stop-loss of short: {}, {}'.format(pair[0], pair[1])) 
                        context.pos.loc[context.tp_count, pair] = 0
                        context.pos_ban.loc[context.tp_count,pair] = 'short'
                #If LONG POSITION:
                if context.pos[pair].loc[context.tp_count] == 1:
                    #TAKE PROFIT LONG
                    if context.dataset[pair].loc[context.tp_count] > -context.std_close_mult*context.dataset[pair].loc['std']:
                        print(context.i, 'take profit of long: {}, {}'.format(pair[0], pair[1])) 
                        context.pos.loc[context.tp_count,pair] = 0
                    #STOP-LOSS LONG
                    elif context.dataset[pair].loc[context.tp_count] < -context.std_sl_mult*context.dataset[pair].loc['std']:
                        print(context.i, 'stop-loss of long: {}, {}'.format(pair[0], pair[1])) 
                        context.pos.loc[context.tp_count, pair] = 0
                        context.pos_ban.loc[context.tp_count, pair] = 'long'
#             print(context.i,'\nFirst pair is:', pair[0], '\nSecond pair is:', pair[1])
#         display(context.tp_count, context.i, current_price, current_price_log, context.dataset)
        
#         display(context.tp_count, context.i, context.pos, context.pos_ban, context.dataset)

    #ORDER LOGIC
    if (context.tp_count>0 and 
            not context.pos.loc[context.tp_count].equals(context.pos.loc[context.tp_count-1]) and 
                context.i % context.minutes == 0):
        #difference between current postitions and previous
        difference = context.pos.loc[context.tp_count] - context.pos.loc[context.tp_count-1]
        #we leave only changed positions
        difference = difference[difference!=0]
        for pair in difference.index:
            #if we don't have to close previously open positions
            if pd.isnull(context.transactions_open[pair].loc[0]):
                # we write id of each opening position
                context.transactions_open.loc[0,pair]  = order_percent(pair[0], (1/context.number_of_pairs)*difference[pair])
                context.transactions_open.loc[1,pair]  = order_percent(pair[1], -(1/context.number_of_pairs)*difference[pair])
            else:
                # we want to close opened positions
                order(pair[0], -float(context.blotter.orders[context.transactions_open[pair].loc[0]].amount))
                order(pair[1], -float(context.blotter.orders[context.transactions_open[pair].loc[1]].amount))
                context.transactions_open[pair] = np.nan
                
#             print(context.transactions_open[pair])
#         display(context.i, data.current_dt, context.portfolio.positions)
        # end trading at the end of TP:        
    if context.tp_count == context.trading_period-1 and (context.i % context.minutes) - 1 == 0 and context.pairs_universe:
        context.pairs_universe = None
#         print(context.i, 'End of trading period')
#         display(context.tp_count, context.i, context.pos, context.pos_ban, context.dataset)
#         display(context.blotter.orders)
    
def analyze(context=None, results=None):
    pass


# Get the universe for a given exchange and a given quote_currency market
# Example: Poloniex BTC Market
def universe_pt(context, now, past):
    # get all the pairs for the given exchange
    json_symbols = get_exchange_symbols(context.exchange)
    df_universe = pd.DataFrame.from_dict(json_symbols).transpose()
#     display(df_universe[df_universe['symbol'] == context.quote_currency])
    df_universe['quote_currency'] = df_universe.apply(lambda row: row.symbol.split('_')[1],
                                    axis=1)
    df_universe['base_currency'] = df_universe.apply(lambda row: row.symbol.split('_')[0],
                                   axis=1)
    # Filter all the pairs to get only the ones for a given quote_currency
    df_universe = df_universe[df_universe['quote_currency'] == context.quote_currency]
    # Check if type of each row is pd.tslib.Timestamp, drop otherwise.
    df_universe = df_universe[df_universe.end_minute.apply(isinstance, args=(pd.tslib.Timestamp,))]
    df_universe = df_universe[df_universe.start_date.apply(isinstance, args=(pd.tslib.Timestamp,))]
    df_universe = df_universe[df_universe.end_minute > now]
    df_universe = df_universe[df_universe.start_date < past]
    return df_universe.symbol.tolist()

# Replace all NA, NAN or infinite values with its nearest value
def fill(series):
    if isinstance(series, pd.Series):
        return series.replace([np.inf, -np.inf], np.nan).ffill().bfill()
    elif isinstance(series, np.ndarray):
        return pd.Series(series).replace(
                     [np.inf, -np.inf], np.nan
                    ).ffill().bfill().values
    else:
        return series

def get_johansen(y, p):
        """
        Get the cointegration vectors at 95% level of significance
        given by the trace statistic test.
        """

        N, l = y.shape
        jres = coint_johansen(y, 0, p)
        trstat = jres.lr1                       # trace statistic
        tsignf = jres.cvt                       # critical values

        for i in range(l):
            if trstat[i] > tsignf[i, 1]:     # 0: 90%  1:95% 2: 99%
                r = i + 1
        jres.r = r
        jres.evecr = jres.evec[:, :r]

        return jres

if __name__ == '__main__':
    start_date = pd.to_datetime('2018-05-04', utc=True)
    end_date = pd.to_datetime('2018-05-10', utc=True)

    performance = run_algorithm(start=start_date, end=end_date,
                                capital_base=100.0,  # amount of quote_currency
                                initialize=initialize,
                                handle_data=handle_data,
                                analyze=analyze,
                                exchange_name='bitfinex',
                                data_frequency='minute',
                                quote_currency='usd',
                                live=False,
                                live_graph=False,
                                algo_namespace='simple_universe')

[2019-05-27 16:14:22.417888] INFO: run_algo: Catalyst version 0.5.21
[2019-05-27 16:14:25.421879] INFO: run_algo: running algo in backtest mode
[2019-05-27 16:14:25.783300] INFO: exchange_algorithm: initialized trading algorithm in backtest mode


100.0

[const                          -573.763509
 TradingPair(98871 [btc_usd])      0.140775
 dtype: float64, const                            86.241320
 TradingPair(125865 [neo_usd])     8.380817
 dtype: float64, const                             97.409346
 TradingPair(587847 [xrp_usd])    786.274308
 dtype: float64, const                             209.698659
 TradingPair(891235 [xlm_usd])    1372.861513
 dtype: float64, const                            4775.391253
 TradingPair(587847 [xrp_usd])    5576.509252
 dtype: float64, const                            13.454463
 TradingPair(587847 [xrp_usd])    79.903686
 dtype: float64, const                            21.068877
 TradingPair(652614 [iot_usd])    26.569345
 dtype: float64, const                             14.738329
 TradingPair(891235 [xlm_usd])    163.792291
 dtype: float64, const                            0.120795
 TradingPair(891235 [xlm_usd])    1.798814
 dtype: float64, const                            0.101695
 TradingPai

6240 long TradingPair(125865 [neo_usd]) short TradingPair(652614 [iot_usd])
6360 stop-loss of long: TradingPair(125865 [neo_usd]), TradingPair(652614 [iot_usd])


98.22963168994825

[const                          -495.315581
 TradingPair(98871 [btc_usd])      0.132808
 dtype: float64, const                            8.577283
 TradingPair(125865 [neo_usd])    9.365281
 dtype: float64, const                            101.896976
 TradingPair(587847 [xrp_usd])    782.307220
 dtype: float64, const                              80.665955
 TradingPair(657702 [trx_usd])    8295.910034
 dtype: float64, const                             133.682956
 TradingPair(891235 [xlm_usd])    1562.967605
 dtype: float64, const                            3911.292088
 TradingPair(125865 [neo_usd])      69.093225
 dtype: float64, const                            4521.076243
 TradingPair(587847 [xrp_usd])    5862.459445
 dtype: float64, const                             4787.983219
 TradingPair(891235 [xlm_usd])    11643.045351
 dtype: float64, const                            11.112009
 TradingPair(587847 [xrp_usd])    82.206840
 dtype: float64, const                             13.7372

6660 short TradingPair(587847 [xrp_usd]) long TradingPair(891235 [xlm_usd])


KeyboardInterrupt: 

In [14]:
performance.to_pickle('/Users/vladimirbadlo/StatArb/perf_5.18_5.19_C_fp20_tp10_nc5_60_2_3_1')