In [103]:
import ira
import inspect
from dataclasses import dataclass
from typing import Union, List
from datetime import timedelta

from sklearn.model_selection import GridSearchCV
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.model_selection._search import BaseSearchCV
from sklearn.pipeline import make_pipeline, Pipeline

from qlearn.core.base import MarketDataComposer, MetaInfo
from qlearn.core.pickers import SingleInstrumentPicker

%alphalab dark
from alpha.utils.data_helpers import FriendlyFxLoader

In [32]:
data = FriendlyFxLoader('dukas @ 5Min').ohlcs(['EURUSD'])

In [107]:
@dataclass
class MarketInfo:
    symbols: Union[List[str], None]
    column: str
    timezone: str = 'UTC'
    session_start = timedelta(hours=0, minutes=0)
    session_end = timedelta(hours=23, minutes=59, seconds=58)
    tick_sizes: dict = None
    tick_prices: dict = None

In [148]:
def pre_close_time_delta(freq):
    return _S1 if freq > _S1 else freq / 10

def pre_close_time_shift(bars):
    _tshift = pd.Timedelta(infer_series_frequency(bars[:100]))
    return _tshift - pre_close_time_delta(_tshift)

def predict(func):
    def wrapped_predict(obj, xp, *args, **kwargs):
        yh = func(obj, xp, *args, **kwargs)
        if not hasattr(obj, 'exact_time') and obj.metadata().column == 'close':
            yh = yh.shift(1, freq=pre_close_time_shift(xp))

        return yh
    return wrapped_predict


def signal_generator(cls):
    m = inspect.getmembers(cls, lambda x: (inspect.isfunction(x) or inspect.ismethod(x)) and x.__name__ == 'predict')
    if m:
        setattr(cls, m[0][0], predict(m[0][1]))
    cls.__qlearn__ = '0.0.1'
    cls.market_info_: MarketInfo = None
    return cls

In [242]:
class MidDayPrice(TransformerMixin):
    def fit(self, x, y, **kwargs):
        return self

    def transform(self, x):
        return x.assign(mid = .5*(x.high+x.low))
    
class Ema(TransformerMixin, BaseEstimator):
    def __init__(self, period):
        self.period = period
        self.__qlearn__ = 1
        
    def fit(self, x, y, **kwargs):
        return self

    def transform(self, x):
        return x.assign(**{f'MA{self.period}': apply_to_frame(ema, x.close, self.period)})

@signal_generator
class TesterSingle(BaseEstimator):
    def __init__(self, period):
        self.period = period

    def fit(self, X, y, **fit_params):
        self.market_info_ = fit_params['market_info_']
        return self

    def predict(self, X):
        print(self._market_info)
        price = X[self.market_info_.column]
#         self.exact_time = 1
        return srows(
            pd.Series(+1, price[(price > price.shift(self.period))].index),
            pd.Series(-1, price[(price < price.shift(self.period))].index))


In [243]:
def scoring(est, x, y):
    return 0

predictor = GridSearchCV(
    cv=TimeSeriesSplit(5),
    estimator = make_pipeline(
        MidDayPrice(), 
        make_pipeline(MidDayPrice(), Ema(15),  make_pipeline(Ema(6)), 
                      make_pipeline(Ema(11), TesterSingle(5)))
    ),
    scoring=scoring,
    param_grid={
        'pipeline__pipeline-2__testersingle__period': [5,10],
    }, verbose=True
)

In [191]:
mdc = MarketDataComposer(predictor, SingleInstrumentPicker())

In [None]:
mdc.fit(data, None) 

In [None]:
predictor.estimator.get_params()

In [246]:
def collect_estimators(p, ex, step=None):
    if isinstance(p, BaseEstimator) and hasattr(p, '__qlearn__'):
        ex.append((step, p))
        return ex
    
    if isinstance(p, Pipeline):
        for sn, se in p.steps:
            collect_estimators(se, ex, (step + '__' + sn) if step else sn)
        return ex
    
    if isinstance(p, BaseSearchCV):
        return collect_estimators(p.estimator, ex, step) 
    
    if isinstance(p, MarketDataComposer):
        return collect_estimators(p.predictor, ex, step) 
    
    return ex 

In [248]:
elst = collect_estimators(predictor, list()) 
elst

[('pipeline__ema', Ema(period=15)),
 ('pipeline__pipeline-1__ema', Ema(period=6)),
 ('pipeline__pipeline-2__ema', Ema(period=11)),
 ('pipeline__pipeline-2__testersingle', TesterSingle(period=5))]

In [213]:
{f'{elst[0][0]}__market_info_': 123}

{'pipeline__pipeline-2__testersingle__market_info_': 123}

In [220]:
predictor.fit(data['EURUSD'], pd.Series(0, data['EURUSD'].index), **{f'{elst[0][0]}__market_info_': 123})

Fitting 5 folds for each of 2 candidates, totalling 10 fits


GridSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None),
             estimator=Pipeline(steps=[('middayprice',
                                        <__main__.MidDayPrice object at 0x7f6d10f917f0>),
                                       ('pipeline',
                                        Pipeline(steps=[('middayprice',
                                                         <__main__.MidDayPrice object at 0x7f6d10f91d68>),
                                                        ('ema', Ema(period=15)),
                                                        ('pipeline-1',
                                                         Pipeline(steps=[('ema',
                                                                          Ema(period=6))])),
                                                        ('pipeline-2',
                                                         Pipeline(steps=[('ema',
                                                         

In [233]:
predictor.best_estimator_.steps[-1][1].steps[-1][1].steps[-1][1].market_info_

123

In [251]:
a = {'a': 1, 'b':2}

In [253]:
a.pop('a')

1

In [254]:
a

{'b': 2}

In [257]:
d = data['EURUSD']

In [258]:
dp = d.close.shift(1) - d.close

In [262]:
dp.where(abs(dp) >= 0, 0)

time
2014-01-01 22:00:00    0.000000
2014-01-01 22:05:00   -0.000255
2014-01-01 22:10:00    0.000320
2014-01-01 22:15:00   -0.000080
2014-01-01 22:20:00   -0.000790
                         ...   
2020-11-26 23:35:00    0.000045
2020-11-26 23:40:00   -0.000035
2020-11-26 23:45:00   -0.000050
2020-11-26 23:50:00   -0.000150
2020-11-26 23:55:00    0.000090
Name: close, Length: 515219, dtype: float64

In [269]:
50*np.arange(1,11)

array([ 50, 100, 150, 200, 250, 300, 350, 400, 450, 500])