In [1]:
import sys
sys.path.append("../")

In [2]:
import pandas as pd
import datetime as dt
import math
import plotly.graph_objects as go
from plotting import CandlePlot
pd.set_option("display.max_columns", None)

In [3]:
class Data:
    
    def __init__(self, path):
        self.df = {
            'raw': pd.read_pickle(path)
        }
        self.df['raw']['time'] = [ x.replace(tzinfo=None) for x in self.df['raw']['time']]

    def __repr__(self) -> str:
        repr = str()
        for name, df in self.df.items():
            repr = repr + name + ':\n' + str(df.head(3)) + '\n'
        return repr

    def shorten(self, name: str, rows: int, direction: int, source: str='raw', cols: list=None):
        '''Create new dataframe with specified list of columns and number of rows
        direction: 1 if data should be selected from top and -1 if from bottom
        '''
        assert (direction != 1 or direction != -1), 'direction must be 1 (top) or -1 (bottom)'
        
        if cols == None:
            cols = self.df[source].columns
        if direction == 1:
            self.df[name] = self.df[source][cols].iloc[:rows].copy()
        else:
            self.df[name] = self.df[source][cols].iloc[-rows:].copy()
        self.df[name].reset_index(drop=True, inplace=True)

    def add_columns(self, name: str, cols: list):
        '''Add new columns to component dataframes
        '''        
        exist_cols = list(self.df[name].columns)
        cols = exist_cols + cols
        self.df[name] = self.df[name].reindex(columns = cols) 

    def prepare_fast_data(self, name: str):
        '''Prepare data as an array for fast processing
        fcols = {col1: col1_index, col2: col2_index, .... }     
        fdata = [array[col1], array[col2], array[col3], .... ]
        Accessed by: self.fdata[fcols[column_name]] for whole column or
                     self.fdata[fcols[column_name]][row_index] for a specific row item
        '''
        self.fcols = dict()
        for i in range(len(self.df[name].columns)):
            self.fcols[self.df[name].columns[i]] = i
        self.fdata = [self.df[name][col].array for col in self.df[name].columns]

In [4]:
d = Data("../data/EUR_USD_M5.pkl")

In [5]:
our_cols = ['time', 'mid_c']
max = d.df['raw'].shape[0]
d.shorten(name='analysis', rows=max, direction=1, cols=our_cols)

In [6]:
print(d)

raw:
                 time  volume    mid_o    mid_h    mid_l    mid_c    bid_o  \
0 2016-01-07 00:00:00      74  1.07764  1.07811  1.07759  1.07786  1.07757   
1 2016-01-07 00:05:00      98  1.07788  1.07818  1.07764  1.07810  1.07779   
2 2016-01-07 00:10:00      28  1.07812  1.07832  1.07812  1.07828  1.07803   

     bid_h    bid_l    bid_c    ask_o    ask_h    ask_l    ask_c  
0  1.07802  1.07750  1.07777  1.07772  1.07820  1.07768  1.07795  
1  1.07811  1.07755  1.07802  1.07798  1.07827  1.07772  1.07819  
2  1.07823  1.07803  1.07819  1.07822  1.07840  1.07822  1.07837  
analysis:
                 time    mid_c
0 2016-01-07 00:00:00  1.07786
1 2016-01-07 00:05:00  1.07810
2 2016-01-07 00:10:00  1.07828



In [7]:
d.add_columns(name='analysis', cols=['ma_20', 'ma_50', ''])

In [13]:
d.df['analysis']['ma_20'] = d.df['analysis'].mid_c.rolling(window=20).mean()
d.df['analysis']['ma_50'] = d.df['analysis'].mid_c.rolling(window=50).mean()
d.df['analysis'].dropna(inplace=True)
d.df['analysis'].reset_index(drop=True, inplace=True)

In [14]:
print(d)

raw:
                 time  volume    mid_o    mid_h    mid_l    mid_c    bid_o  \
0 2016-01-07 00:00:00      74  1.07764  1.07811  1.07759  1.07786  1.07757   
1 2016-01-07 00:05:00      98  1.07788  1.07818  1.07764  1.07810  1.07779   
2 2016-01-07 00:10:00      28  1.07812  1.07832  1.07812  1.07828  1.07803   

     bid_h    bid_l    bid_c    ask_o    ask_h    ask_l    ask_c  
0  1.07802  1.07750  1.07777  1.07772  1.07820  1.07768  1.07795  
1  1.07811  1.07755  1.07802  1.07798  1.07827  1.07772  1.07819  
2  1.07823  1.07803  1.07819  1.07822  1.07840  1.07822  1.07837  
analysis:
                 time    mid_c     ma_20     ma_50
0 2016-01-07 04:05:00  1.08168  1.081763  1.080100
1 2016-01-07 04:10:00  1.08164  1.081819  1.080175
2 2016-01-07 04:15:00  1.08170  1.081840  1.080247



In [15]:
d.prepare_fast_data('analysis')

In [16]:
d.fcols

{'time': 0, 'mid_c': 1, 'ma_20': 2, 'ma_50': 3}

In [17]:
d.df['analysis'].head(3)

Unnamed: 0,time,mid_c,ma_20,ma_50
0,2016-01-07 04:05:00,1.08168,1.081763,1.0801
1,2016-01-07 04:10:00,1.08164,1.081819,1.080175
2,2016-01-07 04:15:00,1.0817,1.08184,1.080247


In [None]:
candles = d.fdata[0].shape[0]
for i in range(candles):
    