In [25]:
import pandas as pd
import matplotlib.pyplot as plt
from numba import jit
from numba import float64
from numba import int64
import numpy as np

In [2]:
a = pd.read_csv('mlfinlab/data_structures/sample.csv')

In [3]:
@jit((float64[:], int64), nopython=True, nogil=True)
def ewma(arr_in, window):
    """Exponentialy weighted moving average specified by a decay ``window``
    to provide better adjustments for small windows via:
        y[t] = (x[t] + (1-a)*x[t-1] + (1-a)^2*x[t-2] + ... + (1-a)^n*x[t-n]) /
               (1 + (1-a) + (1-a)^2 + ... + (1-a)^n).
    Parameters
    ----------
    arr_in : np.ndarray, float64
        A single dimenisional numpy array
    window : int64
        The decay window, or 'span'
    Returns
    -------
    np.ndarray
        The EWMA vector, same length / shape as ``arr_in``
    Examples
    --------
    >>> import pandas as pd
    >>> a = np.arange(5, dtype=float)
    >>> exp = pd.DataFrame(a).ewm(span=10, adjust=True).mean()
    >>> np.array_equal(_ewma_infinite_hist(a, 10), exp.values.ravel())
    True
    """
    n = arr_in.shape[0]
    ewma = np.empty(n, dtype=float64)
    alpha = 2 / float(window + 1)
    w = 1
    ewma_old = arr_in[0]
    ewma[0] = ewma_old
    for i in range(1, n):
        w += (1 - alpha)**i
        ewma_old = ewma_old * (1 - alpha) + arr_in[i]
        ewma[i] = ewma_old / w

    return ewma

In [22]:
cum_dollar_imbalance = 0
cum_ticks = 0
exp_num_ticks = 10000
imb_ewma_window = 5
num_ticks_ticks_ewma_window = 400
num_ticks_bar = []
prev_price = None
imb_array = []
imb_flag = False
idx = []
for index, row in a.iterrows():
    
    price = row['Price']
    cum_ticks  += 1
    if prev_price is None:
        price_diff = 0
    else:
        price_diff = price - prev_price
    
    prev_price = price
    dollar_imb = price_diff * row['Volume']
    cum_dollar_imbalance += dollar_imb
    imb_array.append(dollar_imb)
    if len(imb_array) < imb_ewma_window:
        pass
        #print('wait')
    else:
        exp_num_tick_imb = ewma(np.array(imb_array[-imb_ewma_window:], dtype=float), window=imb_ewma_window)[-1]
        imb_flag = np.abs(cum_dollar_imbalance) > exp_num_ticks * np.abs(exp_num_tick_imb)   
        #print(row['Date and Time'],  cum_dollar_imbalance, exp_num_ticks, np.abs(exp_num_tick_imb), imb_array[-imb_ewma_window:])
    
    if imb_flag == True:
        num_ticks_bar.append(cum_ticks)
        exp_num_ticks = ewma(np.array(num_ticks_bar, dtype=float), num_ticks_ticks_ewma_window)[-1]
        cum_ticks = 0
        cum_dollar_imbalance = 0
        imb_flag = False
        idx.append(row['Date and Time'])
        #print(row['Date and Time'], 'BAR')


KeyboardInterrupt: 

In [73]:
b = pd.read_csv('mlfinlab/data_structures/result_batch.csv')

In [74]:
(b.high - b.low).unique()

array([1.5 , 0.5 , 0.75, 0.25, 1.  , 0.  , 1.25, 1.75, 2.  , 2.25, 2.5 ,
       2.75, 3.  , 3.25, 3.5 , 3.75])

In [66]:
b['low'].unique()

array([1304.5 , 1304.25, 1304.75, 1304.  , 1303.75, 1303.5 , 1306.  ,
       1303.25, 1303.  , 1305.  , 1305.25, 1305.5 , 1305.75, 1302.75,
       1302.5 , 1302.25, 1302.  , 1301.75, 1301.5 , 1301.25, 1301.  ,
       1300.75, 1300.5 , 1300.25, 1300.  , 1299.75, 1299.5 , 1299.25,
       1299.  , 1298.75, 1298.5 , 1298.25, 1298.  , 1297.75, 1297.5 ,
       1297.25, 1297.  , 1296.75, 1296.5 , 1296.25, 1296.  , 1295.75,
       1295.5 , 1295.25, 1295.  , 1294.75, 1294.5 , 1294.25, 1294.  ,
       1293.75, 1293.5 , 1293.25, 1293.  , 1292.75, 1292.5 , 1292.25,
       1292.  , 1291.75, 1291.5 , 1291.25, 1291.  , 1290.75, 1290.5 ,
       1290.25, 1290.  , 1289.75, 1289.5 , 1289.25, 1289.  , 1288.75,
       1288.5 , 1288.25, 1288.  , 1287.75, 1287.5 , 1287.25, 1287.  ,
       1286.75, 1286.5 , 1286.25, 1286.  , 1285.75, 1285.5 , 1285.25,
       1285.  ])