In [3]:
import ccxt
import pandas as pd

def fetch_ohlcv(exchange_name: str,
                symbol: str,
                since: int = None,
                limit: int = 1000) -> pd.DataFrame:
    exchange = getattr(ccxt, exchange_name)()
    ohlcv = exchange.fetch_ohlcv(symbol, timeframe='1d', since=since, limit=limit)
    df = pd.DataFrame(ohlcv, columns=['timestamp','open','high','low','close','volume'])
    df['datetime'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('datetime', inplace=True)
    return df[['open','high','low','close','volume']]

def get_pair_data(exchange_name: str,
                  symbol_a: str,
                  symbol_b: str,
                  since: int = None,
                  limit: int = 1000) -> pd.DataFrame:
    df_a = fetch_ohlcv(exchange_name, symbol_a, since, limit)
    df_b = fetch_ohlcv(exchange_name, symbol_b, since, limit)

    # align on datetime and take only the closing prices
    df = pd.concat([
        df_a['close'].rename(symbol_a),
        df_b['close'].rename(symbol_b)
    ], axis=1).dropna()

    return df

In [16]:
symbols = ['BTC/USDT', 'ETH/USDT', 'XRP/USDT', 'LTC/USDT', 'BCH/USDT', 'ADA/USDT', 'DOT/USDT', 'LINK/USDT', 'XLM/USDT', 'DOGE/USDT']

min_r2 = 0.7
adf_p_max = 0.05

In [17]:
dfs = {}
for symbol in symbols:
    exchange_name = 'binance'
    df = fetch_ohlcv(exchange_name, symbol, since=None, limit=1000)['close']
    dfs[symbol] = df

In [18]:
prices = pd.concat(dfs, axis=1).dropna()

In [28]:
import statsmodels.api as sm

results = []
for i in range(len(symbols)):
    for j in range(i + 1, len(symbols)):
        symbol_a = symbols[i]
        symbol_b = symbols[j]
        
        y = prices[symbol_a]
        X = sm.add_constant(prices[symbol_b])
        model = sm.OLS(y, X).fit()
        
        # filter by R^2
        if model.rsquared < min_r2:
            continue
        
        # compute ADF test
        residuals = model.resid
        adf_statistic, p_value, *_, critical_values = sm.tsa.adfuller(residuals)
        if p_value < adf_p_max:
            results.append({
                'pair': (symbol_a, symbol_b),
                'r2': model.rsquared,
                'adf_statistic': adf_statistic,
                'adf_p_value': p_value,
                'critical_values': critical_values
            })
        

In [29]:
results.sort(key=lambda x: x['adf_p_value'])

In [30]:
results

[{'pair': ('ETH/USDT', 'BCH/USDT'),
  'r2': np.float64(0.7305843005582797),
  'adf_statistic': np.float64(-3.5815212596540484),
  'adf_p_value': np.float64(0.006124288715088263),
  'critical_values': np.float64(11116.579630506896)},
 {'pair': ('ADA/USDT', 'XLM/USDT'),
  'r2': np.float64(0.7931667049439108),
  'adf_statistic': np.float64(-3.301408258119132),
  'adf_p_value': np.float64(0.014815642550319166),
  'critical_values': np.float64(-4468.022979612032)},
 {'pair': ('ADA/USDT', 'LINK/USDT'),
  'r2': np.float64(0.7217974327352055),
  'adf_statistic': np.float64(-3.02086896934438),
  'adf_p_value': np.float64(0.032990929934041964),
  'critical_values': np.float64(-4604.622409737383)},
 {'pair': ('XRP/USDT', 'XLM/USDT'),
  'r2': np.float64(0.867640515053189),
  'adf_statistic': np.float64(-2.9156730628788194),
  'adf_p_value': np.float64(0.04355555452528412),
  'critical_values': np.float64(-2603.483005676414)}]

Hence we have found candidates to use for our strategy. For continuation see [03_cointegration.ipynb](03_cointegration.ipynb).