In [443]:
import pandas as pd
import numpy as np
import sys, os, re
import matplotlib.pyplot as plt

from importlib import reload

if re.match(r'^(\w\:\\)|(/)', os.getcwd()):
    sys.path.insert(0, "c:/Users/dmitr/devs/quantl/")
    sys.path.insert(0, "c:/Users/dmitr/devs/Qube2exp/src/")
    # sys.path.insert(0, "c:/Users/dmitr/devs/Qube/")
    print('installed')

%load_ext cython

T = pd.date_range('2024-01-01 00:00', freq='1Min', periods=100_000)
data = list(zip(T, 1 + (2*np.random.randn(len(T))).cumsum()))

installed
The cython extension is already loaded. To reload it, use:
  %reload_ext cython


# Experiment 2

In [452]:
%load_ext autoreload
%autoreload 2

from qube.utils import reload_pyx_module

reload_pyx_module('../../src/qube/core/')
from qube.core.cseries1 import TimeSeries
del sys.modules['qube.core.cseries1']
from qube.core.cseries1 import (time_to_str, recognize_time, TimeSeries, Ema, Sma)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
 > Reloaded ../../src/qube/core\cseries.pyx
 > Reloaded ../../src/qube/core\cseries1.pyx


## First Cython tests

### experimenting

In [267]:
%%cython

# - - - - - - - - - - - - - - - - - - - - - - - - - - - 
import numpy as np
cimport numpy as np
from typing_extensions import Union
cdef extern from "math.h":
    float INFINITY

from qube.core.cseries1 import (recognize_timeframe, time_to_str, recognize_time, time_delta_to_str)

cdef inline long long floor_t64(long long time, long long dt):
    """
    Floor timestamp by dt
    """
    return time - time % dt

cdef nans(dims):
    return np.nan * np.ones(dims)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - 

cdef class Indexed:
    cdef list values
    cdef float max_series_length

    def __init__(self, max_series_length=INFINITY):
        self.max_series_length = max_series_length
        self.values = list()

    def __len__(self) -> int:
        return len(self.values)

    def __getitem__(self, idx):
        if isinstance(idx, slice):
            return [self.values[self._get_index(i)] for i in range(*idx.indices(len(self.values)))]
        return self.values[self._get_index(idx)]

    def _get_index(self, idx: int) -> int:
        n_len = len(self)
        if n_len == 0 or (idx > 0 and idx > (n_len - 1)) or (idx < 0 and abs(idx) > n_len):
            raise IndexError(f"Can't find record at index {idx}")
        return (n_len - idx - 1) if idx >= 0 else abs(1 + idx)

    def add(self, v):
        self.values.append(v)
        if len(self.values) >= self.max_series_length:
            self.values.pop(0)

    def update(self, v):
        if self.values:
            self.values[-1] = v
        else:
            self.append(v)

cdef class TimeSeries:
    cdef public long long timeframe
    cdef public Indexed times
    cdef public Indexed values
    cdef float max_series_length
    cdef unsigned short _is_new_item
    cdef str name
    cdef indicators

    def __init__(self, str name, timeframe, max_series_length=INFINITY) -> None:
        self.name = name
        self.max_series_length = max_series_length
        self.timeframe = recognize_timeframe(timeframe)
        self.times = Indexed(max_series_length)
        self.values = Indexed(max_series_length)
        self.indicators = list()

    def __len__(self) -> int:
        return len(self.times)

    def __getitem__(self, idx):
        return self.values[idx]

    def _add_new_item(self, long long time, float value):
        self.times.add(time)
        self.values.add(value)
        self._is_new_item = True

    def _update_last_item(self, long long time, float value):
        self.times.update(time)
        self.values.update(value)
        self._is_new_item = False

    def update(self, long long time, float value) -> short:
        item_start_time = floor_t64(time, self.timeframe)
        if not self.times:
            self._add_new_item(item_start_time, value)

            # Here we disable first notification because first item may be incomplete
            self._is_new_item = False
        elif time - self.times[0] >= self.timeframe:
            # first we update indicators by current last bar
            self._update_indicators(item_start_time, value, False)

            # then add new item
            self._add_new_item(item_start_time, value)
        else:
            self._update_last_item(item_start_time, value)

        # update indicators by new data
        self._update_indicators(item_start_time, value, True)

        return self._is_new_item

    cdef _update_indicators(self, long long time, value, short is_last_item):
        for i in self.indicators:
            i.update(time, value, is_last_item)

    def to_records(self) -> dict:
        ts = [np.datetime64(t, 'ns') for t in self.times[::-1]]
        return dict(zip(ts, self.values[::-1]))

    def __str__(self):
        nl = len(self)
        r = f"{self.name}[{time_delta_to_str(self.timeframe)}] | {nl} records\n"
        hd, tl = 3, 3 
        if nl <= hd + tl:
            hd, tl = nl, 0
        
        for n in range(hd):
            r += f"  {time_to_str(self.times[n], 'ns')} {str(self[n])}\n"
        
        if tl > 0:
            r += "   .......... \n"
            for n in range(-tl, 0):
                r += f"  {time_to_str(self.times[n], 'ns')} {str(self[n])}\n"

        return r


cdef class Indicator(TimeSeries):
    def __init__(self, TimeSeries series):
        super().__init__(self.name(), series.timeframe, series.max_series_length)
        series.indicators.append(self)

    def name(self) -> str:
        return 'none'

    def update(self, long long time, value, short is_last_item) -> any:
        iv = self.calculate(time, value, is_last_item)
        if is_last_item:
            if len(self) > 0:
                self._update_last_item(time, iv)
            else:
                self._add_new_item(time, iv)
        else:
            self._add_new_item(time, iv)
        return iv

    def calculate(self, long long time, value) -> any:
        pass


cdef class Sma(Indicator):
    cdef int period
    cdef np.ndarray __s
    cdef int __i

    """
    Simple moving average
    """
    def __init__(self, TimeSeries series, int period):
        self.period = period
        super().__init__(series)
        self.__s = nans(period)
        self.__i = 0

    def name(self) -> str:
        return f'sma{self.period}'

    cpdef calculate(self, long long time, value, short is_last_item):
        cdef float _x = value / self.period
        self.__s[self.__i] = _x
        if not is_last_item:
            self.__i += 1
            if self.__i >= self.period:
                self.__i = 0
        return np.sum(self.__s)

### testing

In [None]:
# %%timeit
ts = TimeSeries('test', '10Min')
ts.update(recognize_time('2024-01-01 00:00'), 1)
ts.update(recognize_time('2024-01-01 00:01'), 5)
ts.update(recognize_time('2024-01-01 00:06'), 2)
ts.update(recognize_time('2024-01-01 00:12'), 3)
ts.update(recognize_time('2024-01-01 00:21'), 4)
ts.update(recognize_time('2024-01-01 00:22'), 5)
ts.update(recognize_time('2024-01-01 00:31'), 6)
ts.update(recognize_time('2024-01-01 00:33'), 7)
ts.update(recognize_time('2024-01-01 00:45'), -12)
ts.update(recognize_time('2024-01-01 00:55'), 12)
ts.update(recognize_time('2024-01-01 01:00'), 12)
print(str(ts))

In [362]:
%%timeit
ts = TimeSeries('close', '1h')
s1 = Sma(ts, 100)
e1 = Ema(ts, 100)
[ts.update(ti.asm8, vi) for ti, vi in data];

467 ms ± 11.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [358]:
x0 = pd.Series(ts.to_records())
x1 = pd.Series(s1.to_records())
x2 = pd.Series(e1.to_records())

In [None]:
plt.plot(x0)
plt.plot(x1)
plt.plot(x2)
plt.plot(x0.ewm(span=100).mean())

In [455]:
ts0 = TimeSeries('close', '1Min')
em0 = Ema(ts0, 5)

for t,v in [
    ('2024-01-01 00:00', 1),
    ('2024-01-01 00:00', 2),
    ('2024-01-01 00:01', 3),
    # ('2024-01-01 00:01', 4),
    # ('2024-01-01 00:02', 5),
]:
    ts0.update(recognize_time(t), v)

~~ UI ~~ TRUE 1.0  >  -> ret NAN |  [ 1. nan nan nan nan]
~~ UI ~~ TRUE 2.0  >  -> ret NAN |  [ 2. nan nan nan nan]
~~ UI ~~ FALSE 3.0 >  -> ret NAN |  [ 3. nan nan nan nan]


In [410]:
ts0.update(recognize_time('2024-01-01 00:11'), 3)

 >in>  3.0
 >>>  2.1 [0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.3]
 >>> STANDARD 3.0 2.0999999046325684 2.263636290485209


1

## Tests of Cy module

In [50]:
%load_ext autoreload
%autoreload 2

from qube.utils import reload_pyx_module

reload_pyx_module('../../src/qube/core/')
from qube.core.cseries1 import TimeItem
del sys.modules['qube.core.cseries1']
from qube.core.cseries1 import (TimeItem, Float, TimeSeries, FloatSeries, Sma, time_to_str, Ema, 
                                recognize_time, time_to_str, time_delta_to_str)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
 > Reloaded ../../src/qube/core\cseries.pyx
 > Reloaded ../../src/qube/core\cseries1.pyx


In [None]:
# %%timeit
ts = FloatSeries('test1', '5Min')
ts.update(Float(recognize_time('2024-01-01 00:00'), 1))
ts.update(Float(recognize_time('2024-01-01 00:06'), 2))
ts.update(Float(recognize_time('2024-01-01 00:12'), 3))
ts.update(Float(recognize_time('2024-01-01 00:21'), 4))
ts.update(Float(recognize_time('2024-01-01 00:22'), 5))
ts.update(Float(recognize_time('2024-01-01 00:31'), 6))
ts.update(Float(recognize_time('2024-01-01 00:33'), 7))
ts.update(Float(recognize_time('2024-01-01 00:45'), 12, 'test1'))
ts.update(Float(recognize_time('2024-01-01 00:55'), 12, 'test2'))
print(ts)

In [None]:
ts[::-1]

In [8]:
# %%timeit
# sma(pd.Series(dict(zip(t, range(len(t))))), 25)

87.7 ms ± 897 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [33]:
T = pd.date_range('2024-01-01 00:00', freq='1Min', periods=100_000)
data = list(zip(T, 1 + (2*np.random.randn(len(T))).cumsum()))

In [44]:
%%timeit
s0 = Ema(100)
# s1 = Sma(100)
# s2 = Sma(50)
fs = FloatSeries('close', '1h').attach(s0)#.attach(s1).attach(s2)
[fs.update(Float(ti.asm8, vi)) for ti, vi in data];

363 ms ± 5.05 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
v0 = pd.Series({np.datetime64(v.time, 'ns'): v.v for v in fs})
ma0 = pd.Series({np.datetime64(v.time, 'ns'): v.v for v in s0.values})
ma1 = pd.Series({np.datetime64(v.time, 'ns'): v.v for v in s1.values})
ma2 = pd.Series({np.datetime64(v.time, 'ns'): v.v for v in s2.values})
plt.plot(v0)
plt.plot(ma0, ls='--')
plt.plot(ma1)
plt.plot(ma2)

In [None]:
print(s1.values)