In [268]:
%matplotlib inline
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
from scipy.stats import norm

import sys
sys.path.append('../lib')
sys.path.append('../utilities')

from plot_utils import *
from retrieve_data import *
from ta_momentum_studies import *
from ta_volume_studies import *
from transformers import *
import talib as ta

In [343]:
def RSI(df, period=14):
        """Relative Strenth Index, suppose Welles Wilder verison
           Args:
                close: Closing price of instrument
                period: number of time periods in the calculation
           Return:
                RSI signal
                feature_dict
        """
        col = 'Close'
        col_name = 'RSI_' + str(period)
        feature_dict[col_name] = 'Keep'
        current_feature['Latest'] = col_name
        df[col_name] = ta.RSI(df[col], period)
        return df

In [344]:
def UltOsc(df, t1=7, t2=14, t3=28):
    """ Ultimate Oscillator
        Uses weighted sums of three oscillators, designed to capture
        momentum across three different timeframes, each of which uses
        a different time period

        Args:
            high, low, close: HLC of instrument
            t1, t2, t3: various time periods in the calculation,
                        default: 7,14,28
            feature_dict: Dictionary of added features
        Return:
            UO signal
            feature_dict
    """
    t1t = 'UltOsc_t1' + str(t1)
    t2t = '_t2' + str(t2)
    t3t = '_t3' + str(t3)
    col_name = t1t + t2t + t3t
    current_feature['Latest'] = col_name
    feature_dict[col_name] = 'Keep'
    df[col_name] = ta.ULTOSC(df.High, df.Low, df.Close,
                       t1, t2, t3
                       )
    return df

In [363]:
def normalizer(dataSet, colname, n=50, mode = 'scale', linear = False):
        """
             It computes the normalized value on the stats of n values
             ( Modes: total or scale ) using the formulas from the book
             "Statistically sound machine learning..." (Aronson and Masters)
             but the decission to apply a non linear scaling is left to the
             user. It's scale is supposed to be -100 to 100.
             -100 to 100 df is an imput DataFrame. it returns also a
             DataFrame, but it could return a list.

            Args:
                dataSet: dataframe cotaining signal to be normalized
                colname: Column name to be normalized
                n: number of data points to get the mean and the
                   quartiles for the normalization
                feature_dict: Dictionary of added features
                mode: scale: scale, without centering.
                        total: center and scale.
                linear: non-linear or linear scaling
            Returns:
                dataSet: Dataset with new feature generated.
                feature_dict: Append entry with colname
        """
        print("normalizer!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
        temp = []
        col_name = str(colname) + '_Normalized'
        feature_dict[col_name] = 'Keep'
        current_feature['Latest'] = col_name
        df = dataSet[colname]
        for i in range(len(df))[::-1]:
            if i  >= n:
                # there will be a traveling norm until we reach the initial n
                # values. Those values will be normalized using the last
                # computed values of F50,F75 and F25
                F50 = df[i-n:i].quantile(0.5)
                F75 =  df[i-n:i].quantile(0.75)
                F25 =  df[i-n:i].quantile(0.25)
            if linear == True and mode == 'total':
                 v = 50 * ((df.iloc[i] - F50) / (F75 - F25)) - 50
            elif linear == True and mode == 'scale':
                 v =  25 * df.iloc[i] / (F75 - F25) -50
            elif linear == False and mode == 'scale':
                 v = 100 * norm.cdf(0.5 * df.iloc[i] / (F75 - F25)) - 50
            else:
                # even if strange values are given, it will perform full
                # normalization with compression as default
                v = norm.cdf(50*(df.iloc[i]-F50)/(F75-F25))-50
            temp.append(v)
        dataSet[col_name] = temp[::-1]
        return  dataSet

In [364]:
if __name__ == "__main__":

    dataLoadStartDate = "2014-04-01"
    dataLoadEndDate = "2018-04-01"
    issue = "TLT"
    feature_dict = {}
    current_feature = {}

    taLibVolSt = TALibVolumeStudies()
    taLibMomSt = TALibMomentumStudies()
    transf = Transformers()

    dSet = DataRetrieve()
    df = dSet.read_issue_data(issue)
    df = dSet.set_date_range(df, dataLoadStartDate,dataLoadEndDate)

Successfully retrieved data series for TLT


In [365]:
def get_from_dict(dataDict, mapList):
    """Iterate nested dictionary"""
    try:
        return reduce(dict.get, mapList, dataDict)
    except TypeError:
        return None  # or some other default value

In [370]:
# Example
# RSI, 20d period, no transform
input_dict = {} # initialize 
input_dict = {'f1': 
              {'fname' : 'RSI', 
               'params' : [10]
               },
              'f2': 
              {'fname' : 'UltimateOscillator', 
               'params' : [10 , 20, 30]
               },
              'f3': 
              {'fname' : 'UltimateOscillator',
               'params' : [],
               'transform' : ['Normalized', 100]
               },
              'f4': 
              {'fname' : 'RSI', 
               'params' : [20],
               'transform' : ['Normalized', 100]
               }
             }

In [371]:
funcDict = {
    "RSI": RSI,
    "Normalized": normalizer,
    "UltimateOscillator": UltOsc
}

In [372]:
for key in input_dict.keys():
    #print(key)
    path = [key, 'fname']
    print('fname: ', get_from_dict(input_dict, path))
    func_name = get_from_dict(input_dict, path)
    
    path = [key, 'params']
    print('params: ', get_from_dict(input_dict, path))
    params = get_from_dict(input_dict, path)  
    df = funcDict[func_name](df, *params)
    print("Current feature: ", current_feature['Latest'])
    
    path = [key, 'transform']
    print('transform: ', get_from_dict(input_dict, path), '\n')
    do_transform = get_from_dict(input_dict, path)
    
    if do_transform:
        print('!!!!', do_transform[0], )
        pass_params = (do_transform[1::])
        print("Current feature: ", current_feature['Latest'])
        df = funcDict[do_transform[0]](df, current_feature['Latest'], *pass_params)
        print("Current feature: ", current_feature['Latest'])

fname:  RSI
params:  [10]
Current feature:  RSI_10
transform:  None 

fname:  UltimateOscillator
params:  [10, 20, 30]
Current feature:  UltOsc_t110_t220_t330
transform:  None 

fname:  UltimateOscillator
params:  []
Current feature:  UltOsc_t17_t214_t328
transform:  ['Normalized', 100] 

!!!! Normalized
Current feature:  UltOsc_t17_t214_t328
normalizer!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!



  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0


Current feature:  UltOsc_t17_t214_t328_Normalized
fname:  RSI
params:  [20]
Current feature:  RSI_20
transform:  ['Normalized', 100] 

!!!! Normalized
Current feature:  RSI_20
normalizer!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

Current feature:  RSI_20_Normalized


In [374]:
print(df.head(5))

           Symbol       Date   Close     High       Low    Open   Volume  \
2014-04-01    TLT 2014-04-01  107.89  108.360  107.8400  108.26  9067266   
2014-04-02    TLT 2014-04-02  107.27  107.550  107.1700  107.39  6887212   
2014-04-03    TLT 2014-04-03  107.74  107.935  107.4400  107.52  4937393   
2014-04-04    TLT 2014-04-04  108.46  108.690  107.8499  107.87  8977484   
2014-04-07    TLT 2014-04-07  109.11  109.290  108.6000  108.62  7562895   

               Pri  RSI_10  UltOsc_t110_t220_t330  UltOsc_t17_t214_t328  \
2014-04-01  107.89     NaN                    NaN                   NaN   
2014-04-02  107.27     NaN                    NaN                   NaN   
2014-04-03  107.74     NaN                    NaN                   NaN   
2014-04-04  108.46     NaN                    NaN                   NaN   
2014-04-07  109.11     NaN                    NaN                   NaN   

            UltOsc_t17_t214_t328_Normalized  RSI_20  RSI_20_Normalized  
2014-04-01         