In [1]:
from pykalman import KalmanFilter
from talib import SMA, EMA
import pandas as pd
import numpy as np
import datetime as dt
from tqdm import tqdm
import plotly.graph_objects as go
from plotting import CandlePlot
import pyarrow as pa
import pyarrow.parquet as pq
pd.set_option("display.max_columns", None)

In [2]:
class Data:
    
    def __init__(self, source):
        assert type(source) == str or type(source) == pd.DataFrame, 'Invalid source'
        if type(source) == str:
            self.df = {
                'raw': pd.read_pickle(source)
            }
        elif type(source) == pd.DataFrame:
            self.df = {
                'raw': source.copy()
            }            

        if 'time' in self.df['raw'].columns:
            self.df['raw']['time'] = [ x.replace(tzinfo=None) for x in self.df['raw']['time']]
        self.datalen = self.df['raw'].shape[0]

    def __repr__(self) -> str:
        repr = str()
        for name, df in self.df.items():
            repr = repr + name + ':\n' + str(pd.concat([df.head(2), df.tail(1)])) + '\n'
        return repr

    def prep_data(self, name: str, rows: int, direction: int, source: str='raw', cols: list=None):
        '''Create new dataframe with specified list of columns and number of rows as preparation for fast data creation
        direction: 1 if data should be selected from top and -1 if from bottom
        '''
        assert (direction != 1 or direction != -1), 'direction must be 1 (top) or -1 (bottom)'
        
        if cols == None:
            cols = self.df[source].columns
        if direction == 1:
            self.df[name] = self.df[source][cols].iloc[:rows].copy()
        else:
            self.df[name] = self.df[source][cols].iloc[-rows:].copy()
        self.df[name].reset_index(drop=True, inplace=True)

    def add_columns(self, name: str, cols: list):
        '''Add new columns to component dataframes
        '''        
        exist_cols = list(self.df[name].columns)
        cols = exist_cols + cols
        self.df[name] = self.df[name].reindex(columns = cols) 

    def prepare_fast_data(self, name: str, rows: int, direction: int, source: str='raw', cols: list=None, add_cols: list=None):
        '''Prepare data as an array for fast processing
        fcols = {col1: col1_index, col2: col2_index, .... }     
        fastdf = [array[col1], array[col2], array[col3], .... ]
        Accessed by: self.fdata()
        '''
        
        self.prep_data(name=name, rows=rows, direction=direction, source=source, cols=cols)
        self.add_columns(name=name, cols=add_cols)

        self.fcols = dict()
        for i in range(len(self.df[name].columns)):
            self.fcols[self.df[name].columns[i]] = i
        self.fastdf = [self.df[name][col].array for col in self.df[name].columns]
        self.fdatalen = len(self.fastdf[0])

    def fdata(self, column: str=None, index: int=None, rows: int=None):
        if column is None:
            return self.fastdf
        if index is None:
            return self.fastdf[self.fcols[column]]
        else:
            if rows:
                try:
                    return self.fastdf[self.fcols[column]][index:index+rows]
                except:
                    return self.fastdf[self.fcols[column]][index:]
            else:
                return self.fastdf[self.fcols[column]][index]
        
    def update_fdata(self, column: str, index: int=None, value: float=None):
        assert value is not None, 'Value cannot be null'
        if index is None:
            assert len(value) == self.fdatalen
            for i in range(self.fdatalen):
                self.fastdf[self.fcols[column]][i] = value[i]
                print(i, )
        else:
            self.fastdf[self.fcols[column]][index] = value


In [3]:
df = pd.read_pickle("../data/EUR_USD_M5.pkl")

In [6]:
our_cols = ['time', 'bid_o', 'ask_o', 'mid_c']
df2 = df[our_cols]

In [4]:
d = Data(df)

In [5]:
d

raw:
                      time  volume    mid_o    mid_h    mid_l    mid_c  \
0      2016-01-07 00:00:00      74  1.07764  1.07811  1.07759  1.07786   
1      2016-01-07 00:05:00      98  1.07788  1.07818  1.07764  1.07810   
594302 2023-12-29 21:55:00     104  1.10392  1.10397  1.10366  1.10374   

          bid_o    bid_h    bid_l    bid_c    ask_o    ask_h    ask_l    ask_c  
0       1.07757  1.07802  1.07750  1.07777  1.07772  1.07820  1.07768  1.07795  
1       1.07779  1.07811  1.07755  1.07802  1.07798  1.07827  1.07772  1.07819  
594302  1.10382  1.10388  1.10346  1.10363  1.10401  1.10406  1.10381  1.10386  