In [1]:
from findataflow.dataprocs import feature_generator as fg
from findataflow.dataprocs import resample as resampler

In [1]:
import pandas as pd
import os

In [3]:
specs = []

In [4]:
# BB
FUNCS = {
    'price': ['BULL_MA', 'BULL_UP', 'BULL_DOWN'],}
PARAMETER = [
    {
        'timeperiod':5,
        'nbdevup': 2, 
        'nbdevdn': 2
    }
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [5]:
# BIAS
FUNCS = {
    'swig': ['BIAS'],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {'timeperiod': 5}, {'timeperiod': 10}, {'timeperiod': 15}, 
         {'timeperiod': 20}, {'timeperiod': 25}, {'timeperiod': 30}
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [6]:
# candlestick
FUNCS = {
    'price': ['candlestick'],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {}
]

specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [7]:
# CDP
FUNCS = {
    'price': [
        'CDP', 'AH', 'AL', 'NH', 'NL'
    ],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {}
]

specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [8]:
# DMI 
FUNCS = {
    'swig': [
        'plusDM', 'minusDM', 'plusDI', 'minusDI', 'DX', 'ADX'
    ],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {'timeperiod': 5}, {'timeperiod': 10}, {'timeperiod': 15}, 
         {'timeperiod': 20}, {'timeperiod': 25}, {'timeperiod': 30}
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [9]:
# KD
FUNCS = {
    'swig': [
        'RSV', 'K', 'D'
    ],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {
        'fastk_period': 9,
        'slowk_period': 3,
        'slowd_period': 3
    },
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [10]:
# MACD
FUNCS = {
    'swig': [
        'MACD', 'MACD_SIGNAL', 'MACD_HIST'
            ],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {
        'fastperiod': 12, 
        'slowperiod': 26, 
        'signalperiod': 9
    }, 
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [11]:
# MFI
FUNCS = {
    'swig': ['MFI'],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {'timeperiod': 5}, {'timeperiod': 10}, {'timeperiod': 15}, 
         {'timeperiod': 20}, {'timeperiod': 25}, {'timeperiod': 30}
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [12]:
# OBV
FUNCS = {
    'vol': ['OBV'],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {}
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [13]:
# RSI
FUNCS = {
    'swig': ['RSI'],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {'timeperiod': 5}, {'timeperiod': 14}, {'timeperiod': 25}
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [14]:
# SAR
FUNCS = {
    'swig': ['SAR'],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())

PARAMETER = [
    {
        'acceleration': 0.02,
        'maximum': 0.2   
    }
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [15]:
# SMA
FUNCS = {
    'price': ['SMA'],
}
assert all(func_type in ['price', 'swig', 'vol'] for func_type in FUNCS.keys())
PARAMETER = [
    {'timeperiod': 5}, {'timeperiod': 10}, {'timeperiod': 15}, 
         {'timeperiod': 20}, {'timeperiod': 25}, {'timeperiod': 30}
]
specs += [
        {
            'symbol': 'TXF',
            'type': func_type,
            'freq': '1D',
            'func': func, 
            'args': param
        } 
        for param in PARAMETER
        for func_type, funcs in FUNCS.items()
        for func in funcs
    ]

In [34]:
def get_single_feature(spec):
    df_feature = (
        resampler.get_OHLCV_given_frequency(spec['symbol'], spec['freq']).
        pipe(fg.generate_root_feature, spec=spec)
    )
    return df_feature

def get_featrues(specs, nlags=5, cache=False, add_diff=False):
    cache_file = 'X.csv'
    if cache:
        if os.path.exists(cache_file):
            df_features = pd.read_csv(cache_file, index_col=0)
            df_features.index = pd.to_datetime(df_features.index, format='%Y-%m-%d %H:%M:%S')
            return df_features
        
    df_features = pd.concat([
        get_single_feature(spec).pipe(fg.extend_by_nlags, spec='', nlags=nlags)
        for spec in specs
    ], axis=1)

    colnames = []
    for idx_spec in range(len(specs)):
        colnames += list(map(lambda x: f'{idx_spec}_{x}', df_features.columns[idx_spec*nlags: (idx_spec+1)*nlags]))
    df_features.columns = colnames
    
    if add_diff:
        df_features = add_diff(df_features)
    df_features.to_csv(cache_file, index=True)
    return df_features

from itertools import combinations
def add_diff(df_features):
    df = df_features.copy()
    for i,j in combinations(df_features.columns, 2):
        df[f'diff_{i}_{j}'] = df[i] - df[j]
    return df

In [32]:
if __name__ == '__main__':
    nlags=5
    df_features = get_featrues(specs, nlags, add_diff=False)
#     assert df_features.shape[1] == len(specs) * nlags
    



transformed OHLCV at frequency 1D
reading cache of TXF in 1D OHLCV
feature: 97367270 has generated.
feature 97367270_lag1 has generated.
feature 97367270_lag2 has generated.
feature 97367270_lag3 has generated.
feature 97367270_lag4 has generated.
transformed OHLCV at frequency 1D
reading cache of TXF in 1D OHLCV
feature: 68488648 has generated.
feature 68488648_lag1 has generated.
feature 68488648_lag2 has generated.
feature 68488648_lag3 has generated.
feature 68488648_lag4 has generated.
transformed OHLCV at frequency 1D
reading cache of TXF in 1D OHLCV
feature: 56494646 has generated.
feature 56494646_lag1 has generated.
feature 56494646_lag2 has generated.
feature 56494646_lag3 has generated.
feature 56494646_lag4 has generated.
transformed OHLCV at frequency 1D
reading cache of TXF in 1D OHLCV
feature: 62918036 has generated.
feature 62918036_lag1 has generated.
feature 62918036_lag2 has generated.
feature 62918036_lag3 has generated.
feature 62918036_lag4 has generated.
transfor

ipdb>  df_features.head()


                     0_97367270  0_97367270_lag1  0_97367270_lag2  \
2011-01-03 13:45:00         NaN              NaN              NaN   
2011-01-04 13:45:00         NaN              NaN              NaN   
2011-01-05 13:45:00         NaN              NaN              NaN   
2011-01-06 13:45:00         NaN              NaN              NaN   
2011-01-07 13:45:00      8891.6              NaN              NaN   

                     0_97367270_lag3  0_97367270_lag4   1_68488648  \
2011-01-03 13:45:00              NaN              NaN          NaN   
2011-01-04 13:45:00              NaN              NaN          NaN   
2011-01-05 13:45:00              NaN              NaN          NaN   
2011-01-06 13:45:00              NaN              NaN          NaN   
2011-01-07 13:45:00              NaN              NaN  9084.566733   

                     1_68488648_lag1  1_68488648_lag2  1_68488648_lag3  \
2011-01-03 13:45:00              NaN              NaN              NaN   
2011-01-04 13:45

ipdb>  q


BdbQuit: 