# Part 4 (Error Correction Model (ECM))

## Imports

In [26]:
import requests
from abc import ABC, abstractmethod
import time
import binance
import pandas as pd
from requests.exceptions import ConnectTimeout
from tqdm import tqdm
import mplfinance as mpf
import os
import plotly.express as px
import numpy as np
from math import ceil
from scipy.stats import zscore
from statsmodels.robust.scale import mad
from scipy.stats import median_abs_deviation
import plotly.graph_objects as go
from scipy.stats import shapiro, skew, kurtosis, norm
from statsmodels.tsa.api import SimpleExpSmoothing
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf, pacf
from plotly.subplots import make_subplots
from statsmodels.tsa.stattools import adfuller
from statsmodels.regression.rolling import RollingOLS
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Constants

### Time Intervals

In [3]:
from datetime import datetime

start_date = datetime(2023, 1, 1)
finish_date = datetime(2024, 1, 1)

start_seconds = int(start_date.timestamp())
finish_seconds = int(finish_date.timestamp())

start_ms = int(start_date.timestamp() * 1000)
finish_ms = int(finish_date.timestamp() * 1000)

### Folders

In [25]:
DATA_DIR = 'data/'
RAW_DATA_DIR = DATA_DIR + 'raw data/'
ROBUST_DATA_DIR = DATA_DIR + 'robust raw data/'
RESAMPLED_DATA_DIR = DATA_DIR + 'resampled data/'

TIME_SCALES = {
    '1min': os.path.join(RESAMPLED_DATA_DIR, '1 Minute/'),
    '5min': os.path.join(RESAMPLED_DATA_DIR, '5 Minutes/'),
    '20min': os.path.join(RESAMPLED_DATA_DIR, '20 Minutes/'),
    '1h': os.path.join(RESAMPLED_DATA_DIR, '1 Hour/'),
    '1d': os.path.join(RESAMPLED_DATA_DIR, '1 Day/'),
    '1month': os.path.join(RESAMPLED_DATA_DIR, '1 Month/')
}

## Load Data

In [13]:
def load_data(exchange, time_scale='1d'):
    file_path = os.path.join(TIME_SCALES[time_scale], f'Last-{exchange}-USDTTMN.csv')
    df = pd.read_csv(file_path)
    df['open_time'] = pd.to_datetime(df['open_time'])
    df.set_index('open_time', inplace=True)
    return df

In [None]:
df1 = load_data('Tabdeal', time_scale='1d')
df2 = load_data('Wallex', time_scale='1d')

df1, df2 = df1.align(df2, join='inner', axis=0)

## Equilibrium Reversion

### ECM Model

In [29]:
def perform_cointegration_test(df1, df2):
    merged_data = pd.merge(df1, df2, left_index=True, right_index=True, suffixes=('_exchange1', '_exchange2'))
    
    X = sm.add_constant(merged_data['close_price_exchange2'])
    model = sm.OLS(merged_data['close_price_exchange1'], X).fit()
    
    merged_data['residuals'] = merged_data['close_price_exchange1'] - (model.params[0] + model.params[1] * merged_data['close_price_exchange2'])
    
    return model, merged_data

def calculate_ecm(merged_data):
    merged_data['delta_exchange1'] = merged_data['close_price_exchange1'].diff()
    merged_data['delta_exchange2'] = merged_data['close_price_exchange2'].diff()
    merged_data['lagged_residuals'] = merged_data['residuals'].shift(1)
    
    ecm_data = merged_data.dropna()
    
    X_ecm = sm.add_constant(ecm_data[['delta_exchange2', 'lagged_residuals']])
    ecm_model = sm.OLS(ecm_data['delta_exchange1'], X_ecm).fit()
    
    return ecm_model, ecm_data

def plot_results(merged_data, ecm_model, ecm_data, exchange1_name, exchange2_name):
    # Plot prices
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=merged_data.index, y=merged_data['close_price_exchange1'],
                             mode='lines', name=f'{exchange1_name} Price'))
    fig.add_trace(go.Scatter(x=merged_data.index, y=merged_data['close_price_exchange2'],
                             mode='lines', name=f'{exchange2_name} Price'))
    fig.update_layout(title='Price Data from Both Exchanges',
                      xaxis_title='Time',
                      yaxis_title='Price',
                      legend_title='Exchange',
                      template='plotly_dark')
    fig.show()

    # Plot cointegration spread
    fig_spread = go.Figure()
    fig_spread.add_trace(go.Scatter(x=merged_data.index, y=merged_data['residuals'],
                                    mode='lines', name='Cointegration Spread'))
    fig_spread.update_layout(title='Cointegration Spread Over Time',
                             xaxis_title='Time',
                             yaxis_title='Spread',
                             template='plotly_dark')
    fig_spread.show()

    # Plot ECM results
    fig_ecm = go.Figure()
    fig_ecm.add_trace(go.Scatter(x=ecm_data.index, y=ecm_data['delta_exchange1'],
                                 mode='lines', name=f'{exchange1_name} Delta'))
    fig_ecm.add_trace(go.Scatter(x=ecm_data.index, y=ecm_data['delta_exchange2'],
                                 mode='lines', name=f'{exchange2_name} Delta'))
    fig_ecm.add_trace(go.Scatter(x=ecm_data.index, y=ecm_data['lagged_residuals'],
                                 mode='lines', name='Lagged Residuals'))
    fig_ecm.update_layout(title='ECM Variables Over Time',
                          xaxis_title='Time',
                          yaxis_title='Value',
                          legend_title='Variables',
                          template='plotly_dark')
    fig_ecm.show()

def cointegration_test_and_ecm(exchange1, exchange2, time_scale='1d'):
    df1 = load_data(exchange1, time_scale)
    df2 = load_data(exchange2, time_scale)
    
    model, merged_data = perform_cointegration_test(df1, df2)
    
    ecm_model, ecm_data = calculate_ecm(merged_data)
    
    print("Cointegration Model Summary:")
    print(model.summary())
    
    print("ECM Model Summary:")
    print(ecm_model.summary())
    
    plot_results(merged_data, ecm_model, ecm_data, exchange1, exchange2)

In [31]:
cointegration_test_and_ecm('Nobitex', 'Tabdeal', time_scale='1d')

Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.997
Model:                               OLS   Adj. R-squared:                  0.997
Method:                    Least Squares   F-statistic:                 1.241e+05
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):               0.00
Time:                           18:37:39   Log-Likelihood:                -2393.7
No. Observations:                    366   AIC:                             4791.
Df Residuals:                        364   BIC:                             4799.
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------

### On Monthly Segments

In [47]:
def cointegration_test_and_ecm_by_month(exchange1, exchange2, time_scale='1d'):
    df1 = load_data(exchange1, time_scale)
    df2 = load_data(exchange2, time_scale)
    
    merged_data = pd.merge(df1, df2, left_index=True, right_index=True, suffixes=('_exchange1', '_exchange2'))
    
    monthly_groups = merged_data.groupby(pd.Grouper(freq='M'))
    
    monthly_results = []
    
    for month, month_data in monthly_groups:
        if len(month_data) < 2:
            continue
        
        model, month_data_with_residuals = perform_cointegration_test(month_data[['close_price_exchange1']], month_data[['close_price_exchange2']])
        ecm_model, ecm_data = calculate_ecm(month_data_with_residuals)
        
        monthly_results.append({
            'month': month,
            'model_summary': model.summary(),
            'ecm_summary': ecm_model.summary(),
            'ecm_speed': ecm_model.params['lagged_residuals'],
            'ecm_pvalue': ecm_model.pvalues['lagged_residuals'],
            'ecm_data': ecm_data
        })
        
        print(f"Month: {month.strftime('%Y-%m')}")
        print("Cointegration Model Summary:")
        print(model.summary())
        print("ECM Model Summary:")
        print(ecm_model.summary())
        print(f"Reversion Speed: {ecm_model.params['lagged_residuals']:.4f}, P-Value: {ecm_model.pvalues['lagged_residuals']:.4f}")
        print("-" * 80)
        
        plot_results(month_data_with_residuals, ecm_model, ecm_data, exchange1, exchange2)
    
    return monthly_results

In [48]:
def analyze_reversion_dynamics(monthly_results):
    # Analyze the significance and speed of reversion across different months
    reversion_speeds = []
    p_values = []
    
    for result in monthly_results:
        reversion_speed = result['ecm_speed']
        reversion_speeds.append(reversion_speed)
        
        p_value = result['ecm_pvalue']
        p_values.append(p_value)
        
        print(f"Month: {result['month'].strftime('%Y-%m')}, Reversion Speed: {reversion_speed:.4f}, P-Value: {p_value:.4f}")
    
    # Plot the reversion speed over time
    months = [result['month'] for result in monthly_results]
    
    fig_speed = go.Figure()
    fig_speed.add_trace(go.Scatter(x=months, y=reversion_speeds,
                                   mode='lines+markers', name='Reversion Speed'))
    fig_speed.update_layout(title='Reversion Speed Over Time (Monthly)',
                            xaxis_title='Month',
                            yaxis_title='Reversion Speed',
                            template='plotly_dark')
    fig_speed.show()
    
    # Plot the p-values over time
    fig_pvalue = go.Figure()
    fig_pvalue.add_trace(go.Scatter(x=months, y=p_values,
                                    mode='lines+markers', name='P-Value of Reversion Speed'))
    fig_pvalue.update_layout(title='P-Value of Reversion Speed Over Time (Monthly)',
                             xaxis_title='Month',
                             yaxis_title='P-Value',
                             template='plotly_dark')
    fig_pvalue.show()

    # Discuss implications
    avg_speed = sum(reversion_speeds) / len(reversion_speeds)
    avg_pvalue = sum(p_values) / len(p_values)
    
    print(f"\nAverage Reversion Speed: {avg_speed:.4f}")
    print(f"Average P-Value of Reversion Speed: {avg_pvalue:.4f}")
    
    if avg_pvalue < 0.05:
        print("The reversion speed is generally significant across the months.")
    else:
        print("The reversion speed is generally not significant across the months, suggesting that deviations from equilibrium may not always be corrected.")

In [49]:
monthly_results = cointegration_test_and_ecm_by_month('Tabdeal', 'Nobitex', time_scale='1d')

Month: 2023-01
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.994
Model:                               OLS   Adj. R-squared:                  0.994
Method:                    Least Squares   F-statistic:                     4854.
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           7.70e-34
Time:                           19:12:34   Log-Likelihood:                -196.58
No. Observations:                     31   AIC:                             397.2
Df Residuals:                         29   BIC:                             400.0
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-02
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.997
Model:                               OLS   Adj. R-squared:                  0.997
Method:                    Least Squares   F-statistic:                     8727.
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           2.18e-34
Time:                           19:12:34   Log-Likelihood:                -193.90
No. Observations:                     28   AIC:                             391.8
Df Residuals:                         26   BIC:                             394.5
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-03
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.995
Model:                               OLS   Adj. R-squared:                  0.995
Method:                    Least Squares   F-statistic:                     5683.
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           7.93e-35
Time:                           19:12:34   Log-Likelihood:                -207.12
No. Observations:                     31   AIC:                             418.2
Df Residuals:                         29   BIC:                             421.1
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-04
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.973
Model:                               OLS   Adj. R-squared:                  0.972
Method:                    Least Squares   F-statistic:                     1006.
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           1.72e-23
Time:                           19:12:34   Log-Likelihood:                -198.29
No. Observations:                     30   AIC:                             400.6
Df Residuals:                         28   BIC:                             403.4
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-05
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.990
Model:                               OLS   Adj. R-squared:                  0.989
Method:                    Least Squares   F-statistic:                     2754.
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           2.69e-30
Time:                           19:12:34   Log-Likelihood:                -195.89
No. Observations:                     31   AIC:                             395.8
Df Residuals:                         29   BIC:                             398.6
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-06
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.970
Model:                               OLS   Adj. R-squared:                  0.969
Method:                    Least Squares   F-statistic:                     920.8
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           5.76e-23
Time:                           19:12:34   Log-Likelihood:                -199.37
No. Observations:                     30   AIC:                             402.7
Df Residuals:                         28   BIC:                             405.5
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-07
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.965
Model:                               OLS   Adj. R-squared:                  0.964
Method:                    Least Squares   F-statistic:                     801.8
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           1.11e-22
Time:                           19:12:34   Log-Likelihood:                -194.50
No. Observations:                     31   AIC:                             393.0
Df Residuals:                         29   BIC:                             395.9
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-08
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.947
Model:                               OLS   Adj. R-squared:                  0.945
Method:                    Least Squares   F-statistic:                     513.4
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           5.44e-20
Time:                           19:12:34   Log-Likelihood:                -177.95
No. Observations:                     31   AIC:                             359.9
Df Residuals:                         29   BIC:                             362.8
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-09
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.925
Model:                               OLS   Adj. R-squared:                  0.922
Method:                    Least Squares   F-statistic:                     344.1
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           2.89e-17
Time:                           19:12:34   Log-Likelihood:                -167.80
No. Observations:                     30   AIC:                             339.6
Df Residuals:                         28   BIC:                             342.4
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-10
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.997
Model:                               OLS   Adj. R-squared:                  0.997
Method:                    Least Squares   F-statistic:                     9634.
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           3.88e-38
Time:                           19:12:34   Log-Likelihood:                -163.73
No. Observations:                     31   AIC:                             331.5
Df Residuals:                         29   BIC:                             334.3
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-11
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.988
Model:                               OLS   Adj. R-squared:                  0.987
Method:                    Least Squares   F-statistic:                     2264.
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           2.48e-28
Time:                           19:12:35   Log-Likelihood:                -159.87
No. Observations:                     30   AIC:                             323.7
Df Residuals:                         28   BIC:                             326.5
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

Month: 2023-12
Cointegration Model Summary:
                              OLS Regression Results                             
Dep. Variable:     close_price_exchange1   R-squared:                       0.959
Model:                               OLS   Adj. R-squared:                  0.958
Method:                    Least Squares   F-statistic:                     682.2
Date:                   Sun, 11 Aug 2024   Prob (F-statistic):           1.06e-21
Time:                           19:12:35   Log-Likelihood:                -165.06
No. Observations:                     31   AIC:                             334.1
Df Residuals:                         29   BIC:                             337.0
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

In [45]:
analyze_reversion_dynamics(monthly_results)

Month: 2023-01, Reversion Speed: -0.8557, P-Value: 0.0002
Month: 2023-02, Reversion Speed: -0.7859, P-Value: 0.0013
Month: 2023-03, Reversion Speed: -0.9844, P-Value: 0.0007
Month: 2023-04, Reversion Speed: -0.5367, P-Value: 0.0068
Month: 2023-05, Reversion Speed: -0.5803, P-Value: 0.0021
Month: 2023-06, Reversion Speed: -0.9013, P-Value: 0.0000
Month: 2023-07, Reversion Speed: -0.7267, P-Value: 0.0006
Month: 2023-08, Reversion Speed: -0.8794, P-Value: 0.0001
Month: 2023-09, Reversion Speed: -0.9492, P-Value: 0.0000
Month: 2023-10, Reversion Speed: -0.9118, P-Value: 0.0000
Month: 2023-11, Reversion Speed: -0.9016, P-Value: 0.0001
Month: 2023-12, Reversion Speed: -0.6744, P-Value: 0.0008



Average Reversion Speed: -0.8073
Average P-Value of Reversion Speed: 0.0011
The reversion speed is generally significant across the months.


- Higher reversion speeds suggest that the market corrects deviations from equilibrium more quickly,
indicating a more efficient market during those periods.

- Slower reversion speeds could indicate short-term inefficiencies that might be exploitable through trading strategies.