In [1]:
import sys
sys.path.append("../")

In [2]:
import pandas as pd
import numpy as np
import datetime as dt
from tqdm import tqdm
import plotly.graph_objects as go
from plotting import CandlePlot
pd.set_option("display.max_columns", None)

In [3]:
class Data:
    
    def __init__(self, path):
        self.df = {
            'raw': pd.read_pickle(path)
        }
        if 'time' in self.df['raw'].columns:
            self.df['raw']['time'] = [ x.replace(tzinfo=None) for x in self.df['raw']['time']]
        self.datalen = self.df['raw'].shape[0]

    def __repr__(self) -> str:
        repr = str()
        for name, df in self.df.items():
            repr = repr + name + ':\n' + str(pd.concat([df.head(2), df.tail(1)])) + '\n'
        return repr

    def prep_data(self, name: str, rows: int, direction: int, source: str='raw', cols: list=None):
        '''Create new dataframe with specified list of columns and number of rows as preparation for fast data creation
        direction: 1 if data should be selected from top and -1 if from bottom
        '''
        assert (direction != 1 or direction != -1), 'direction must be 1 (top) or -1 (bottom)'
        
        if cols == None:
            cols = self.df[source].columns
        if direction == 1:
            self.df[name] = self.df[source][cols].iloc[:rows].copy()
        else:
            self.df[name] = self.df[source][cols].iloc[-rows:].copy()
        self.df[name].reset_index(drop=True, inplace=True)

    def add_columns(self, name: str, cols: list):
        '''Add new columns to component dataframes
        '''        
        exist_cols = list(self.df[name].columns)
        cols = exist_cols + cols
        self.df[name] = self.df[name].reindex(columns = cols) 

    def prepare_fast_data(self, name: str, rows: int, direction: int, source: str='raw', cols: list=None, add_cols: list=None):
        '''Prepare data as an array for fast processing
        fcols = {col1: col1_index, col2: col2_index, .... }     
        fastdf = [array[col1], array[col2], array[col3], .... ]
        Accessed by: self.fdata()
        '''
        self.prep_data(name=name, rows=rows, direction=direction, source=source, cols=cols)
        self.add_columns(name=name, cols=add_cols)

        self.fcols = dict()
        print(self.df[name].columns)
        for i in range(len(self.df[name].columns)):
            self.fcols[self.df[name].columns[i]] = i
        self.fastdf = [self.df[name][col].array for col in self.df[name].columns]
        self.fdatalen = len(self.fastdf[0])

    def fdata(self, column: str, index: int=None, forward: bool=False):
        if index is None:
            return self.fastdf[self.fcols[column]]
        else:
            if forward:
                return self.fastdf[self.fcols[column]][index:]
            else:
                return self.fastdf[self.fcols[column]][index]
        
    def update_fdata(self, column: str, index: int=None, value=None):
        assert value is not None, 'Value cannot be null'
        if index is None:
            self.fastdf[self.fcols[column]] = value
        else:
            self.fastdf[self.fcols[column]][index] = value

In [4]:
d = Data("../data/EUR_USD_M5.pkl")

In [5]:
def add_movement_cols(pips: int, count: int):
    cols = list()
    for i in range(count):
        cols += [f'move_{i+1}_pips', f'move_{i+1}_periods']
    return cols

In [6]:
MOVE_COUNT = 20
PIPS = 20

In [7]:
our_cols = ['time', 'mid_c']
# max = d.datalen
# d.shorten(name='analysis', rows=1000, direction=1, cols=our_cols)
d.prepare_fast_data(name='ana2', rows=1000, direction=1, cols=our_cols,
                    add_cols=['pip_returns'] + add_movement_cols(PIPS, MOVE_COUNT))
                    # add_cols=['yyy', 'jjj'])

Index(['time', 'mid_c', 'pip_returns', 'move_1_pips', 'move_1_periods',
       'move_2_pips', 'move_2_periods', 'move_3_pips', 'move_3_periods',
       'move_4_pips', 'move_4_periods', 'move_5_pips', 'move_5_periods',
       'move_6_pips', 'move_6_periods', 'move_7_pips', 'move_7_periods',
       'move_8_pips', 'move_8_periods', 'move_9_pips', 'move_9_periods',
       'move_10_pips', 'move_10_periods', 'move_11_pips', 'move_11_periods',
       'move_12_pips', 'move_12_periods', 'move_13_pips', 'move_13_periods',
       'move_14_pips', 'move_14_periods', 'move_15_pips', 'move_15_periods',
       'move_16_pips', 'move_16_periods', 'move_17_pips', 'move_17_periods',
       'move_18_pips', 'move_18_periods', 'move_19_pips', 'move_19_periods',
       'move_20_pips', 'move_20_periods'],
      dtype='object')


In [8]:
def calc_pip_returns(price: np.ndarray) -> np.ndarray:
    next_price = np.append(price[1:], np.nan)
    return (next_price - price) * pow(10, 4)

def calc_pip_returns(price: pd.Series) -> pd.Series:
    prev_price = price.shift(1)
    return (price - prev_price) * pow(10, 4)
# d.update_fdata('pip_returns', value=calc_pip_returns(d.fdata('mid_c')))
d.df['ana2']['pip_returns'] = calc_pip_returns(d.df['ana2']['mid_c'])

In [9]:
d.df['ana2'].head()

Unnamed: 0,time,mid_c,pip_returns,move_1_pips,move_1_periods,move_2_pips,move_2_periods,move_3_pips,move_3_periods,move_4_pips,move_4_periods,move_5_pips,move_5_periods,move_6_pips,move_6_periods,move_7_pips,move_7_periods,move_8_pips,move_8_periods,move_9_pips,move_9_periods,move_10_pips,move_10_periods,move_11_pips,move_11_periods,move_12_pips,move_12_periods,move_13_pips,move_13_periods,move_14_pips,move_14_periods,move_15_pips,move_15_periods,move_16_pips,move_16_periods,move_17_pips,move_17_periods,move_18_pips,move_18_periods,move_19_pips,move_19_periods,move_20_pips,move_20_periods
0,2016-01-07 00:00:00,1.07786,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2016-01-07 00:05:00,1.0781,2.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2016-01-07 00:10:00,1.07828,1.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2016-01-07 00:15:00,1.07798,-3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2016-01-07 00:20:00,1.0779,-0.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [10]:
# d.update_fdata('pip_returns', index=1, value=0)

In [11]:
d.df['ana2'].head()

Unnamed: 0,time,mid_c,pip_returns,move_1_pips,move_1_periods,move_2_pips,move_2_periods,move_3_pips,move_3_periods,move_4_pips,move_4_periods,move_5_pips,move_5_periods,move_6_pips,move_6_periods,move_7_pips,move_7_periods,move_8_pips,move_8_periods,move_9_pips,move_9_periods,move_10_pips,move_10_periods,move_11_pips,move_11_periods,move_12_pips,move_12_periods,move_13_pips,move_13_periods,move_14_pips,move_14_periods,move_15_pips,move_15_periods,move_16_pips,move_16_periods,move_17_pips,move_17_periods,move_18_pips,move_18_periods,move_19_pips,move_19_periods,move_20_pips,move_20_periods
0,2016-01-07 00:00:00,1.07786,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2016-01-07 00:05:00,1.0781,2.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2016-01-07 00:10:00,1.07828,1.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2016-01-07 00:15:00,1.07798,-3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2016-01-07 00:20:00,1.0779,-0.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [12]:
# d.fastdf

In [13]:
# d.update_fdata('pip_returns', index=2, value=10)

In [14]:
# d.fdata('pip_returns', 2)

In [15]:
d.df['ana2']

Unnamed: 0,time,mid_c,pip_returns,move_1_pips,move_1_periods,move_2_pips,move_2_periods,move_3_pips,move_3_periods,move_4_pips,move_4_periods,move_5_pips,move_5_periods,move_6_pips,move_6_periods,move_7_pips,move_7_periods,move_8_pips,move_8_periods,move_9_pips,move_9_periods,move_10_pips,move_10_periods,move_11_pips,move_11_periods,move_12_pips,move_12_periods,move_13_pips,move_13_periods,move_14_pips,move_14_periods,move_15_pips,move_15_periods,move_16_pips,move_16_periods,move_17_pips,move_17_periods,move_18_pips,move_18_periods,move_19_pips,move_19_periods,move_20_pips,move_20_periods
0,2016-01-07 00:00:00,1.07786,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2016-01-07 00:05:00,1.07810,2.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2016-01-07 00:10:00,1.07828,1.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2016-01-07 00:15:00,1.07798,-3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2016-01-07 00:20:00,1.07790,-0.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2016-01-12 10:55:00,1.08486,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
996,2016-01-12 11:00:00,1.08500,1.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
997,2016-01-12 11:05:00,1.08560,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
998,2016-01-12 11:10:00,1.08578,1.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [16]:
candles = d.fdatalen
for i in tqdm(range(10), desc=" Processing... "):
    pivot = i
    for j in range(MOVE_COUNT):
        close_price, forward_close_prices = d.fdata('mid_c', index=pivot), d.fdata('mid_c', index=pivot+1, forward=True)
        pip_returns = (forward_close_prices - close_price) * pow(10, 4)
        target = pip_returns > PIPS or pip_returns < PIPS
        pivot = np.where(target)[0][0]
        d.update_fdata(f'move_{j+1}_pips', i, pip_returns[pivot])
        d.update_fdata(f'move_{j+1}_periods', i, pivot + 1)
        pivot = i + pivot + 1 # Update pivot to close timestamp: Close means when PIPS target met
        



    

 Processing... : 100%|██████████| 10/10 [00:00<00:00, 371.36it/s]


In [17]:
d.df['ana2'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 43 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   time             1000 non-null   datetime64[ns]
 1   mid_c            1000 non-null   float64       
 2   pip_returns      999 non-null    float64       
 3   move_1_pips      10 non-null     float64       
 4   move_1_periods   10 non-null     float64       
 5   move_2_pips      10 non-null     float64       
 6   move_2_periods   10 non-null     float64       
 7   move_3_pips      10 non-null     float64       
 8   move_3_periods   10 non-null     float64       
 9   move_4_pips      10 non-null     float64       
 10  move_4_periods   10 non-null     float64       
 11  move_5_pips      10 non-null     float64       
 12  move_5_periods   10 non-null     float64       
 13  move_6_pips      10 non-null     float64       
 14  move_6_periods   10 non-null     float64 

In [19]:
d.df['ana2'].to_csv('D:/Trading/ml4t-data/test.csv')

In [None]:
close_price, forward_close_prices, pip_returns

In [None]:
target = pip_returns > PIPS or pip_returns < PIPS
target

In [None]:
pivot = np.where(target)[0][0]
pivot

In [None]:
# cp = CandlePlot(d.df['raw'][516800:519800], candles=True)
# cp.show_plot()

In [None]:
# Define two arrays
arr1 = np.array([3.14159, 2.71828, 1.61803])
arr2 = np.array([1.23456, 0.56789, 9.87654])

# Use the context manager for arr1
with np.printoptions(precision=1):
    print(arr1)  # This will print with precision 1

# This will print with the default precision
print(arr2)

In [None]:
arr1, arr2

In [None]:
shift = np.append(arr1[1:], np.nan)

In [None]:
shift

In [None]:
(shift - arr1) * pow(10, 4)

In [None]:
arr1[-4]

In [20]:
from numba import jit, cuda
import numpy as np
from timeit import default_timer as timer

def func(a):
    for i in range(10000000):
        a[i] += 1

@jit(target_backend='cuda')
def func2(a):
    for i in range(10000000):
        a[i] += 1

if __name__ == "__main__":
    n = 10000000
    a = np.ones(n, dtype=np.float64)
    start = timer()
    func(a)
    print("without GPU:", timer() - start)
    start = timer()
    func2(a)
    print("with GPU:", timer() - start)

without GPU: 3.0319362999871373
with GPU: 0.6702421999070793
