In [1]:
import MetaTrader5 as mt5 
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller 
from datetime import datetime
mt5.initialize()
# Replace following with your MT5 Account Login
account=51434456 # 
password="9UpBvVzc"
server = 'ICMarkets-Demo'
def get_rates(pair1, timeframe, x):
    pair1 = pd.DataFrame(mt5.copy_rates_from_pos(pair1, timeframe, 0, x))
    pair1['time'] = pd.to_datetime(pair1['time'], unit = 's')
    return pair1[['time','open', 'high', 'low', 'close']].set_index('time')

def compute_spread(p1, p2, tf, x):
    data1 = get_rates(p1, tf, x)
    data2 = get_rates(p2, tf, x)
    merged = data1.join(data2, lsuffix="_x", rsuffix="_y")
    spread = merged['close_x'] - merged['close_y']
    return spread.dropna()

def adf_test(spread):
    '''Runs ADF test on a spread series'''
    result = adfuller(spread)
    return {'ADF Statistic': result[0], 'p-value': result[1], 'Critical Values': result[4]}

In [14]:
def get_pair_correlations(symbol1, symbol2, window):
    s1 = str(symbol1)
    s2 = str(symbol2)
    symbol1 = get_rates(symbol1, mt5.TIMEFRAME_D1, 5000)
    symbol2 = get_rates(symbol2, mt5.TIMEFRAME_D1, 5000)

    combined_df = pd.concat([symbol1['close'].rename(f'{s1}_close'),
                            symbol2['close'].rename(f'{s2}_close')], axis=1)

    window_size = window  # Change this to the size of the window you want
    combined_df['rolling_corr'] = combined_df[f'{s1}_close'].rolling(window=window_size).corr(combined_df[f'{s2}_close'])
    combined_df['spread'] = combined_df[f'{s1}_close'] - combined_df[f'{s2}_close'] 
    combined_df[f'{s1}_return'] = combined_df[f'{s1}_close'].pct_change()
    combined_df[f'{s2}_return'] = combined_df[f'{s2}_close'].pct_change()
    combined_df['diff'] = combined_df[f'{s1}_return'] - combined_df[f'{s2}_return']
    combined_df['rolling_corr_returns'] = combined_df['rolling_corr'].rolling(window=window_size).corr(combined_df['diff'])
    combined_df['rolling_var'] = combined_df['EURUSD.a_close'].rolling(window = 25).var()
    combined_df['MA_Ratio'] = combined_df['spread'].rolling(window = 15).mean() / combined_df['spread'].rolling(window = 75).mean()
    return combined_df.dropna()

In [15]:
EU_GU = get_pair_correlations('EURUSD.a', 'GBPUSD.a', 5)

In [16]:
granger_df = pd.DataFrame()
for i in range(1,4):
    granger_df[f'MA_Ratio_{i}'] = EU_GU['MA_Ratio'].shift(i)

for i in range(6, 9):
    granger_df[f'rolling_corr_ret_{i}'] = EU_GU['rolling_corr_returns'].shift(i)

granger_df['rolling_var_3'] = EU_GU['rolling_var'].shift(3)
granger_df['rolling_var_4'] = EU_GU['rolling_var'].shift(4)
granger_df['spread'] = EU_GU['spread']
granger_df = granger_df.dropna()

In [17]:
df = granger_df

In [18]:
import warnings
warnings.filterwarnings("ignore")

In [19]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import pandas as pd
import numpy as np

# Assuming 'df' is the DataFrame with your data

# Split the data into training and test sets
train_df, test_df = train_test_split(df, test_size=0.2, shuffle=False)

# ARIMAX Model - using p, d, q values as 1 for demonstration; these should be determined by analysis
# Extracting the target variable and the features
y_train = train_df['spread']
X_train = train_df.drop('spread', axis=1)
y_test = test_df['spread']
X_test = test_df.drop('spread', axis=1)

# Building the ARIMAX model
arimax_model = SARIMAX(y_train, exog=X_train, order=(1, 1, 1))
arimax_result = arimax_model.fit()

# Making predictions
arimax_predictions = arimax_result.get_forecast(steps=len(y_test), exog=X_test).predicted_mean
arimax_mse = mean_squared_error(y_test, arimax_predictions)

# Random Forest Model - Hyperparameter tuning
rf = RandomForestRegressor(random_state=42)

# Set up the parameters for grid search; these ranges are quite small for demonstration purposes
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2, 4]
}

# Perform grid search
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, n_jobs=-1, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Best Random Forest model after grid search
best_rf = grid_search.best_estimator_

# Making predictions
rf_predictions = best_rf.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_predictions)

In [20]:
(arimax_mse, rf_mse, grid_search.best_params_)

(0.0028157274763189323,
 0.02710045271108657,
 {'max_depth': 5,
  'min_samples_leaf': 4,
  'min_samples_split': 2,
  'n_estimators': 50})

In [22]:
df['spread']

time
2004-11-26   -0.56360
2004-11-29   -0.56640
2004-11-30   -0.58040
2004-12-01   -0.60000
2004-12-02   -0.59720
               ...   
2023-10-30   -0.15538
2023-10-31   -0.15716
2023-11-01   -0.15818
2023-11-02   -0.15816
2023-11-03   -0.16526
Name: spread, Length: 4843, dtype: float64