# IMPORT LIBRARIES

In [1]:
import MetaTrader5 as mt5

import pandas as pd

import pytz

from datetime import datetime

from sklearn.model_selection import train_test_split

import seaborn as sns

import matplotlib.pyplot as plt

from scipy.stats import entropy
from scipy.stats import ks_2samp

# CONFIG

In [2]:
pd.set_option('display.float_format', '{:.5f}'.format)

In [3]:
gc_o_TIME_ZONE = pytz.timezone("Etc/UTC")
gc_dt_FROM = datetime(2022, 3, 1, tzinfo=gc_o_TIME_ZONE)
gc_dt_TO = datetime(2022, 7, 1, tzinfo=gc_o_TIME_ZONE)

In [4]:
gc_a_FEATURES_TO_ANALYZE = ['close','open', 'high', 'low','real_volume','return']

In [5]:
gc_f_TRAINING_RATIO = 0.60
gc_f_VALIDATION_RATIO = 0.20
gc_f_TEST_RATIO = 0.20

# UDFS

In [6]:
def dfFetchSampleDataFromMt(p_sSymbolName):
    aOhlSample = mt5.copy_rates_range(
        p_sSymbolName,
        mt5.TIMEFRAME_M15,
        gc_dt_FROM, 
        gc_dt_TO
    )

    dfToReturn = pd.DataFrame(aOhlSample)
    if 'time' in list(dfToReturn.columns):
        dfToReturn.set_index('time', inplace = True)
    else:
        return dfFetchSampleDataFromMt(p_sSymbolName)
    
    return dfToReturn

In [7]:
def dfCalculateReturn(p_dfToSplit):
    p_dfToSplit['return'] =  (p_dfToSplit['close']-p_dfToSplit['open'])/p_dfToSplit['open']
    return p_dfToSplit

In [8]:
def ixSplitDateset(p_dfToSplit):

    ixAll = p_dfToSplit.index

    ixTrain, ixTest = train_test_split(
        ixAll,
        test_size=1-gc_f_TRAINING_RATIO,
        shuffle=False)

    ixValidation, ixTest = train_test_split(
        ixTest,
        test_size=gc_f_TEST_RATIO/(gc_f_TEST_RATIO + gc_f_VALIDATION_RATIO),
        shuffle=False)
    
    
    return ixTrain, ixValidation, ixTest

# MAIN

In [9]:
# establish connection to the MetaTrader 5 terminal
if not mt5.initialize():
    print("initialize() failed, error code =",mt5.last_error())
    quit()
    
tplSymbols = mt5.symbols_get()
dfSymbols = pd.DataFrame(tplSymbols, columns = tplSymbols[0]._asdict().keys())

In [10]:
def aGetUniqueCategories(tplSymbols):
    aCategories = []
    for i in range(0, len(tplSymbols)):
        sCategory = '\\'.join(tplSymbols[i]._asdict()['path'].split('\\')[:-1]) +'\\'
        if sCategory not in aCategories:
            aCategories.append(sCategory)
    return aCategories

In [14]:
aCategories = aGetUniqueCategories(tplSymbols)

sCategory = aCategories[-5] 
dfFilteredSymbols = dfSymbols[dfSymbols['path'].str.contains(sCategory, regex=False) == True]

dfOhlc = pd.DataFrame()

for iIndex, srsRow in dfFilteredSymbols.iterrows():
    sSymbolName = dfFilteredSymbols.loc[iIndex, 'name']

    dfOhlcSample =  dfFetchSampleDataFromMt(sSymbolName)
    if len(dfOhlcSample) > 1000:
        dfOhlcSample = dfCalculateReturn(dfOhlcSample)
        dfOhlcSample['SYMBOL_NAME'] =  sSymbolName
        dfOhlc = dfOhlc.append(dfOhlcSample)