In [1]:
import ccxt
import pandas as pd

def fetch_ohlcv(exchange_name: str,
                symbol: str,
                since: int = None,
                limit: int = 1000) -> pd.DataFrame:
    exchange = getattr(ccxt, exchange_name)()
    ohlcv = exchange.fetch_ohlcv(symbol, timeframe='1d', since=since, limit=limit)
    df = pd.DataFrame(ohlcv, columns=['timestamp','open','high','low','close','volume'])
    df['datetime'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('datetime', inplace=True)
    return df[['open','high','low','close','volume']]

def get_pair_data(exchange_name: str,
                  symbol_a: str,
                  symbol_b: str,
                  since: int = None,
                  limit: int = 1000) -> pd.DataFrame:
    df_a = fetch_ohlcv(exchange_name, symbol_a, since, limit)
    df_b = fetch_ohlcv(exchange_name, symbol_b, since, limit)

    # align on datetime and take only the closing prices
    df = pd.concat([
        df_a['close'].rename(symbol_a),
        df_b['close'].rename(symbol_b)
    ], axis=1).dropna()

    return df



In [2]:
symbols = ['BTC/USDT', 'ETH/USDT', 'XRP/USDT', 'LTC/USDT', 'BCH/USDT', 'ADA/USDT', 'DOT/USDT', 'LINK/USDT', 'XLM/USDT', 'DOGE/USDT']

min_r2 = 0.7
adf_p_max = 0.05

In [3]:
dfs = {}
for symbol in symbols:
    exchange_name = 'binance'
    df = fetch_ohlcv(exchange_name, symbol, since=None, limit=1000)['close']
    dfs[symbol] = df

In [4]:
prices = pd.concat(dfs, axis=1).dropna()

In [5]:
import statsmodels.api as sm

results = []
for i in range(len(symbols)):
    for j in range(i + 1, len(symbols)):
        symbol_a = symbols[i]
        symbol_b = symbols[j]
        
        y = prices[symbol_a]
        X = sm.add_constant(prices[symbol_b])
        model = sm.OLS(y, X).fit()
        
        # filter by R^2
        if model.rsquared < min_r2:
            continue
        
        # compute ADF test
        residuals = model.resid
        adf_statistic, p_value, *_, critical_values = sm.tsa.adfuller(residuals)
        if p_value < adf_p_max:
            results.append({
                'pair': (symbol_a, symbol_b),
                'r2': model.rsquared,
                'adf_statistic': adf_statistic,
                'adf_p_value': p_value,
                'critical_values': critical_values
            })
        

In [6]:
results.sort(key=lambda x: x['adf_p_value'])

In [7]:
results

[{'pair': ('ETH/USDT', 'BCH/USDT'),
  'r2': np.float64(0.7326093359048288),
  'adf_statistic': np.float64(-3.6033669331133926),
  'adf_p_value': np.float64(0.005696984499991305),
  'critical_values': np.float64(11119.340060667602)},
 {'pair': ('ADA/USDT', 'XLM/USDT'),
  'r2': np.float64(0.7928502623327882),
  'adf_statistic': np.float64(-3.2830405024648446),
  'adf_p_value': np.float64(0.015653795231479146),
  'critical_values': np.float64(-4469.682505416663)},
 {'pair': ('ADA/USDT', 'LINK/USDT'),
  'r2': np.float64(0.7232024437229555),
  'adf_statistic': np.float64(-3.0236361850627236),
  'adf_p_value': np.float64(0.03274526302667385),
  'critical_values': np.float64(-4603.367418068815)}]

In [8]:
prices

Unnamed: 0_level_0,BTC/USDT,ETH/USDT,XRP/USDT,LTC/USDT,BCH/USDT,ADA/USDT,DOT/USDT,LINK/USDT,XLM/USDT,DOGE/USDT
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-11-02,20151.84,1518.34,0.4497,60.95,113.7,0.3856,6.250,7.437,0.1075,0.12765
2022-11-03,20207.82,1531.01,0.4548,61.84,115.6,0.3895,6.410,7.720,0.1088,0.12262
2022-11-04,21148.52,1644.78,0.5064,67.58,124.5,0.4216,7.050,8.715,0.1156,0.12613
2022-11-05,21299.37,1626.98,0.4930,69.23,123.7,0.4264,7.030,8.663,0.1136,0.12445
2022-11-06,20905.58,1568.29,0.4709,68.26,116.7,0.4021,6.810,8.101,0.1090,0.11477
...,...,...,...,...,...,...,...,...,...,...
2025-07-24,118340.99,3706.94,3.1453,112.26,513.0,0.8060,4.000,17.910,0.4246,0.23218
2025-07-25,117614.31,3724.96,3.1423,113.78,557.3,0.8158,4.088,18.280,0.4313,0.23673
2025-07-26,117919.99,3741.10,3.1662,113.91,559.1,0.8198,4.124,18.410,0.4352,0.23543
2025-07-27,119415.55,3872.10,3.2389,114.78,590.0,0.8320,4.240,19.240,0.4442,0.24071


Hence we have found candidates to use for our strategy. For continuation see [03_cointegration.ipynb](03_cointegration.ipynb).