In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta,date
from dateutil.relativedelta import *
from xapi.enums import PeriodCode
from DataCollector import  DataCollector


In [13]:
class HistoricalDataCollector():

    ''' Class for collection historical data from XTB

    Attrs
    ==================

    symbol - string
            ticker symbol e.g 'EURUSD'

    start - string
            start date

    end - string
          end date

    period - string
           period in enum format from PeriodCode Class

    cols_to_save - list
                list of columns to save, names taken from DataCollector class

    '''
    def __init__(self, symbol, start, credentials_file, end=None, period = PeriodCode.PERIOD_MN1):
        self.dataCollector = DataCollector(credentials_file)
        self.symbol = symbol
        self.possible_symbols = self.dataCollector.get_symbol_strings()
        self.start = start
        self.end = end if end is not None else datetime.now().strftime('%Y-%m-%d')
        self.cols_to_save =['Date','Open', 'Close', 'High', 'Low']
        self.data = None
        self.api_client = None
        self.max_range = {'PERIOD_M1': 1, 'PERIOD_M5': 1, 'PERIOD_M15': 1, 'PERIOD_M30': 6, 'PERIOD_H1': 6, 'PERIOD_H4': 12}
        self.period = period
        self.check_max_range()

    async def run(self):
        await self.dataCollector.connect()
        self.api_client = self.dataCollector.get_socket_object()
        return await self.get_history_data()

    def __repr__(self):
        rep = "DataCollector(symbol = {}, start = {}, end = {}, period= {})"
        return rep.format(self.symbol, self.start, self.end, self.period)

    def possible_symbols(self):
        return self.possible_symbols

    async def get_history_data(self):
        ''' Collect and prepares the data'''

        self.check_max_range()

        end_date = datetime.strptime(self.end, '%Y-%m-%d')
        end = int(datetime.timestamp(end_date) * 1000)
        start_date = datetime.strptime(self.start, '%Y-%m-%d')
        start = int(datetime.timestamp(start_date) * 1000)

        history_data = await self.api_client.getChartRangeRequest(symbol= self.symbol, start = start, end = end, period= self.period, ticks= 0)
        df = self.history_converter(history_data)
        self.data = df
        return  df

    def history_converter(self, history):
        '''Convert data from dict to pandas df'''

        df_dict = history['returnData']['rateInfos']
        digits = history['returnData']['digits']

        df = pd.DataFrame.from_dict(df_dict)

        df['Date'] = df['ctm'].apply(lambda x: datetime.fromtimestamp(x / 1000))
        df['Open'] = df['open'] / (10 ** digits)
        df['Close'] = df['Open'] + df['close'] / (10 ** digits)
        df['High'] = df['Open'] + df['high'] / (10 ** digits)
        df['Low'] = df['Open'] + df['low'] / (10 ** digits)

        df = df[self.cols_to_save]
        df.set_index("Date", inplace=True, drop=True)


        return df

    def check_max_range(self):
        '''Check max range for given period and correct it if exceeded'''

        if self.period in self.max_range.keys():

            end = datetime.now()
            start = datetime.strptime(self.start, '%Y-%m-%d')
            delta = relativedelta(end, start)

            delta_months = delta.months + (delta.years * 12)

            if self.max_range[self.period] < delta_months:
                print(f"Max range for given period {self.period} is {self.max_range[self.period]} months from now")
                date_start = datetime.now() + relativedelta(months=-self.max_range[self.period])
                if date_start > datetime.strptime(self.end, '%Y-%m-%d'):
                    self.end = datetime.now().strftime('%Y-%m-%d')
                    print(f"End date is set to {self.end}")


                self.start = date_start.strftime('%Y-%m-%d')
                print(f"Start date is set to {self.start}")


In [14]:
hist_obj =  HistoricalDataCollector(
    symbol='EURUSD',
    start='2023-01-01',
    end='2023-08-01',
    period = PeriodCode.PERIOD_D1,
    credentials_file='credentials.json'
)

Data was loaded from a CSV file to get symbols.


In [15]:
response = await hist_obj.run()

In [16]:
response

Unnamed: 0_level_0,Open,Close,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-01-02,1.06922,1.06657,1.07100,1.06500
2023-01-03,1.06657,1.05470,1.06827,1.05191
2023-01-04,1.05470,1.06052,1.06349,1.05401
2023-01-05,1.06052,1.05206,1.06309,1.05145
2023-01-06,1.05203,1.06456,1.06477,1.04809
...,...,...,...,...
2023-07-27,1.10866,1.09767,1.11491,1.09654
2023-07-28,1.09769,1.10224,1.10470,1.09431
2023-07-30,1.10274,1.10171,1.10274,1.10110
2023-07-31,1.10173,1.09968,1.10454,1.09896
