# Part 3 (Cointegration Analysis)

## Imports

In [10]:
import requests
from abc import ABC, abstractmethod
import time
import binance
import pandas as pd
from requests.exceptions import ConnectTimeout
from tqdm import tqdm
import mplfinance as mpf
import os
import plotly.express as px
import numpy as np
from math import ceil
from scipy.stats import zscore
from statsmodels.robust.scale import mad
from scipy.stats import median_abs_deviation
import plotly.graph_objects as go
from scipy.stats import shapiro, skew, kurtosis, norm
from statsmodels.tsa.api import SimpleExpSmoothing
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf, pacf
from plotly.subplots import make_subplots
from statsmodels.tsa.stattools import adfuller
from statsmodels.regression.rolling import RollingOLS
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Constants

### Time Intervals

In [3]:
from datetime import datetime

start_date = datetime(2023, 1, 1)
finish_date = datetime(2024, 1, 1)

start_seconds = int(start_date.timestamp())
finish_seconds = int(finish_date.timestamp())

start_ms = int(start_date.timestamp() * 1000)
finish_ms = int(finish_date.timestamp() * 1000)

### Folders

In [4]:
DATA_DIR = 'data/'
RAW_DATA_DIR = DATA_DIR + 'raw data/'
ROBUST_DATA_DIR = DATA_DIR + 'robust raw data/'
RESAMPLED_DATA_DIR = DATA_DIR + 'resampled data/'
RESAMPLED_1MIN_DATA_DIR = RESAMPLED_DATA_DIR + '1 Minute/'
RESAMPLED_5MIN_DATA_DIR = RESAMPLED_DATA_DIR + '5 Minutes/'
RESAMPLED_20MIN_DATA_DIR = RESAMPLED_DATA_DIR + '20 Minutes/'
RESAMPLED_1H_DATA_DIR = RESAMPLED_DATA_DIR + '1 Hour/'
RESAMPLED_1D_DATA_DIR = RESAMPLED_DATA_DIR + '1 Day/'
RESAMPLED_1MONTH_DATA_DIR = RESAMPLED_DATA_DIR + '1 Month/'

## Load Data

In [None]:
def load_data(exchange1_file, exchange2_file):
    exchange1_data = pd.read_csv(exchange1_file)
    exchange2_data = pd.read_csv(exchange2_file)
    
    exchange1_data['open_time'] = pd.to_datetime(exchange1_data['open_time'])
    exchange2_data['open_time'] = pd.to_datetime(exchange2_data['open_time'])

    merged_data = pd.merge(exchange1_data[['open_time', 'close_price']], 
                           exchange2_data[['open_time', 'close_price']], 
                           on='open_time', suffixes=('_exchange1', '_exchange2'))
    
    return merged_data

## Monthly Cointegration Test

In [25]:
def perform_monthly_cointegration_tests(merged_data):
    merged_data['month'] = merged_data['open_time'].dt.to_period('M')
    monthly_results = []
    spread_data_list = []

    for month, group in merged_data.groupby('month'):
        exchange1_prices = group['close_price_exchange1']
        exchange2_prices = group['close_price_exchange2']
        
        if exchange1_prices.nunique() == 1 or exchange2_prices.nunique() == 1:
            print(f"Skipping month {month}: one of the series is constant.")
            continue
        
        coint_score, p_value, _ = coint(exchange1_prices, exchange2_prices)
        
        X = sm.add_constant(exchange2_prices)
        model = sm.OLS(exchange1_prices, X).fit()
        alpha = model.params[0]
        beta = model.params[1]
        
        spread = exchange1_prices - (alpha + beta * exchange2_prices)
        spread_data_list.append(pd.DataFrame({'open_time': group['open_time'], 'spread': spread}))
        
        monthly_results.append({
            'month': month,
            'coint_score': coint_score,
            'p_value': p_value,
            'alpha': alpha,
            'beta': beta
        })
    
    spread_data = pd.concat(spread_data_list, ignore_index=True)
    
    return pd.DataFrame(monthly_results), spread_data

## Plot

In [31]:
def plot_monthly_results(results, spread_data, merged_data):
    # Plotting the prices of both datasets
    fig_prices = go.Figure()

    fig_prices.add_trace(go.Scatter(x=merged_data['open_time'], y=merged_data['close_price_exchange1'],
                                    mode='lines', name='Exchange 1 Price'))

    fig_prices.add_trace(go.Scatter(x=merged_data['open_time'], y=merged_data['close_price_exchange2'],
                                    mode='lines', name='Exchange 2 Price'))

    fig_prices.update_layout(title='Price Data from Both Exchanges',
                             xaxis_title='Time',
                             yaxis_title='Price',
                             legend_title='Exchange',
                             template='plotly_dark')

    fig_prices.show()

    fig = go.Figure()

    # Cointegration test results
    fig.add_trace(go.Scatter(x=results['month'].astype(str), y=results['coint_score'],
                             mode='lines+markers', name='Cointegration Test Statistic'))

    fig.add_trace(go.Scatter(x=results['month'].astype(str), y=results['alpha'],
                             mode='lines+markers', name='Alpha (Intercept)'))

    fig.add_trace(go.Scatter(x=results['month'].astype(str), y=results['beta'],
                             mode='lines+markers', name='Beta (Slope)'))

    fig.update_layout(title='Monthly Cointegration Analysis',
                      xaxis_title='Month',
                      yaxis_title='Value',
                      legend_title='Metrics',
                      template='plotly_dark')
    
    fig.show()

    # Plotting the cointegration spread
    fig_spread = go.Figure()
    fig_spread.add_trace(go.Scatter(x=spread_data['open_time'], y=spread_data['spread'],
                                    mode='lines', name='Cointegration Spread'))

    fig_spread.update_layout(title='Cointegration Spread Over Time',
                             xaxis_title='Time',
                             yaxis_title='Spread',
                             template='plotly_dark')

    fig_spread.show()

In [32]:
def cointegration_test(exchange1_file, exchange2_file):
    merged_data = load_data(exchange1_file, exchange2_file)
    
    if merged_data.empty:
        print("No overlapping data found between the datasets.")
        return
    
    monthly_results, spread_data = perform_monthly_cointegration_tests(merged_data)
    
    plot_monthly_results(monthly_results, spread_data, merged_data)
    
    print(monthly_results)

In [33]:
cointegration_test(RESAMPLED_1D_DATA_DIR + 'Last-Nobitex-USDTTMN.csv', RESAMPLED_1D_DATA_DIR + 'Last-Tabdeal-USDTTMN.csv')

Skipping month 2024-01: one of the series is constant.


      month  coint_score   p_value        alpha      beta
0   2023-01    -4.852926  0.000304  -984.602409  1.018140
1   2023-02    -4.047185  0.006144   340.968603  0.987186
2   2023-03    -3.507742  0.031715   262.236486  0.992259
3   2023-04    -4.615319  0.000787   521.550764  0.987163
4   2023-05    -2.392808  0.328269  1247.295232  0.973534
5   2023-06    -5.601717  0.000011  1891.861115  0.960429
6   2023-07    -4.665256  0.000647  -747.925192  1.013551
7   2023-08    -4.715938  0.000529 -5289.564140  1.106266
8   2023-09    -1.821566  0.619189  2985.259848  0.939418
9   2023-10    -5.105441  0.000105   -32.500696  1.000921
10  2023-11    -2.518035  0.271174  -438.839134  1.008865
11  2023-12    -2.607879  0.233667 -2061.576715  1.040392


In [34]:
cointegration_test(RESAMPLED_1D_DATA_DIR + 'Last-Wallex-USDTTMN.csv', RESAMPLED_1D_DATA_DIR + 'Last-Tabdeal-USDTTMN.csv')

Skipping month 2024-01: one of the series is constant.


      month  coint_score       p_value        alpha      beta
0   2023-01    -5.301437  4.426684e-05  -428.822748  1.008010
1   2023-02    -3.711604  1.770536e-02  -494.525947  1.007305
2   2023-03    -4.880501  2.713988e-04   302.116258  0.991645
3   2023-04    -4.145698  4.402218e-03  -916.845232  1.015927
4   2023-05    -5.834612  3.660152e-06  -533.150491  1.009586
5   2023-06    -5.196270  7.058760e-05  1393.869984  0.970199
6   2023-07    -6.831000  2.154283e-08  1350.679247  0.971969
7   2023-08    -4.442154  1.521911e-03  -758.557614  1.015108
8   2023-09    -4.834387  3.280477e-04  1075.610035  0.978741
9   2023-10    -6.313804  3.315847e-07  1043.148333  0.980144
10  2023-11    -4.582710  8.926626e-04   188.717635  0.996431
11  2023-12    -3.683339  1.924879e-02 -6302.564008  1.124693


In [35]:
cointegration_test(RESAMPLED_1D_DATA_DIR + 'Last-Nobitex-USDTTMN.csv', RESAMPLED_1D_DATA_DIR + 'Last-Wallex-USDTTMN.csv')

Skipping month 2024-01: one of the series is constant.


      month  coint_score       p_value        alpha      beta
0   2023-01    -3.525271  3.021993e-02  -404.758102  1.006600
1   2023-02    -3.034283  1.022796e-01   817.199356  0.980198
2   2023-03    -4.691562  5.830735e-04    56.751446  0.998732
3   2023-04    -3.548096  2.836335e-02  2338.382593  0.954063
4   2023-05    -2.043881  5.057059e-01  2033.054584  0.959182
5   2023-06    -5.816201  4.002427e-06   546.105485  0.989248
6   2023-07    -3.186725  7.217597e-02 -2184.061447  1.043339
7   2023-08    -4.654978  6.736232e-04 -3797.513954  1.076424
8   2023-09    -6.258775  4.399119e-07  2679.925507  0.945322
9   2023-10    -6.916349  1.355487e-08  -766.518292  1.014745
10  2023-11    -1.906017  5.769520e-01    18.238771  0.999832
11  2023-12    -4.466156  1.391236e-03  5731.961513  0.886224
