In [1]:
from time_series_transform.transform_core_api.time_series_transformer import Time_Series_Transformer
from time_series_transform.stock_transform.stock_transfromer import Stock_Transformer
import pandas as pd
import numpy as np
import pandas_ta as ta

# Machine Learning

This demo is going to train a machine learning model using sklearn, Stock_Transformer, and Time_Series_Transformer to predict the momentum of stock on daily basis. Subsequently, we will use the momentum to generate buy/sell signals and make a plot using Time_Series_Transformer.

Note: This tutorial is aim to demonstrate how to use time_series_transformer, instead of showing how to invest in stock

## Data Prep

We use yahoo api to fetch Google, NASDQ, and Gold data for past 3 years. In turn, there are various technical indicators generated such as Bollinger Bands, RSI, MACD, and Exponential Moving Average.

In [2]:
strategy = ta.Strategy(
    name= 'mystrategy',
    ta=[
        {"kind": "ema", "length": 50},
        {"kind": "ema", "length": 7},
        {"kind": "ema", "length": 20},
        {"kind": "bbands", "length": 20},
        {"kind": "bbands", "length": 50},
        {"kind": "bbands", "length": 30},
        {"kind": "rsi","prefix":"rsi"},
        {"kind": "macd", "fast": 8, "slow": 21},
    ]
)

In [3]:
st = Stock_Transformer.from_stock_engine_period(["GOOGL","NDAQ","GOLD"],'3y','yahoo')
st = st.get_technial_indicator(strategy)
st = st.dropna()

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    3.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    5.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    5.2s finished


Since we want to use Nasdaq and Gold as features, we have to expand it into columns. Then, we use make_lead features combining with the up_down_transform function to generate the pricing moment for the next time period.

In [4]:
def up_or_down(current,lead):
    if current < lead:
        return 'up'
    elif current > lead:
        return "down"
    else:
        return "unchange"
def up_down_transform(data):
    currentList = data['Close_GOOGL']
    leadList = data['Close_GOOGL_lead_1']
    res = []
    for c,l in zip(currentList,leadList):
        res.append(up_or_down(c,l))
    return res

In [21]:
df = st.to_pandas(expandCategory= True, expandTime = False)
tst = Time_Series_Transformer.from_pandas(df,'Date',None)
tst = tst.make_lead('Close_GOOGL',1,'_lead_')
tst = tst.transform(['Close_GOOGL','Close_GOOGL_lead_1'],'change',up_down_transform)
tst = tst.remove_feature("Close_GOOGL_lead_1")
tst = tst.make_label("change")
X,y = tst.to_pandas(sepLabel = True)

After generating X and y, we split 60 days of data as test data. time_series_transform also provide some sklearn transformers. For instance, Lag_Transformer is an implmentation of generating multiple lag features and can be combined with sklearn pipeline. It means that we can simply tune the lag number using cross validation.

In [22]:
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.decomposition import PCA
from sklearn.model_selection import RandomizedSearchCV,TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from time_series_transform.sklearn.transformer import Lag_Transformer

In [23]:
testX,testY = X.tail(60),y.tail(60)
trainX,trainY = X.drop(testX.index),y.drop(testY.index)

In [36]:
trainX.columns

Index(['Date', 'Open_GOOGL', 'High_GOOGL', 'Low_GOOGL', 'Close_GOOGL',
       'Volume_GOOGL', 'Dividends_GOOGL', 'Stock Splits_GOOGL', 'EMA_50_GOOGL',
       'EMA_7_GOOGL', 'EMA_20_GOOGL', 'BBL_20_2.0_GOOGL', 'BBM_20_2.0_GOOGL',
       'BBU_20_2.0_GOOGL', 'BBL_50_2.0_GOOGL', 'BBM_50_2.0_GOOGL',
       'BBU_50_2.0_GOOGL', 'BBL_30_2.0_GOOGL', 'BBM_30_2.0_GOOGL',
       'BBU_30_2.0_GOOGL', 'rsi_RSI_14_GOOGL', 'MACD_8_21_9_GOOGL',
       'MACDh_8_21_9_GOOGL', 'MACDs_8_21_9_GOOGL', 'Open_NDAQ', 'High_NDAQ',
       'Low_NDAQ', 'Close_NDAQ', 'Volume_NDAQ', 'Dividends_NDAQ',
       'Stock Splits_NDAQ', 'EMA_50_NDAQ', 'EMA_7_NDAQ', 'EMA_20_NDAQ',
       'BBL_20_2.0_NDAQ', 'BBM_20_2.0_NDAQ', 'BBU_20_2.0_NDAQ',
       'BBL_50_2.0_NDAQ', 'BBM_50_2.0_NDAQ', 'BBU_50_2.0_NDAQ',
       'BBL_30_2.0_NDAQ', 'BBM_30_2.0_NDAQ', 'BBU_30_2.0_NDAQ',
       'rsi_RSI_14_NDAQ', 'MACD_8_21_9_NDAQ', 'MACDh_8_21_9_NDAQ',
       'MACDs_8_21_9_NDAQ', 'Open_GOLD', 'High_GOLD', 'Low_GOLD', 'Close_GOLD',
       'Volume_

In [25]:
pip = Pipeline(
    [
        ("lag",Lag_Transformer(list(range(1,20)),time_col = 'Date')),
        ("impute",SimpleImputer(strategy = 'median')),
        ("pca",PCA()),
        ('rf',RandomForestClassifier())
     ]
)

In [26]:
searchParam = {
    "lag__lag_nums":[list(range(1,20)),list(range(1,50)),list(range(1,100))],
    "pca__n_components":list(range(2,30)),
    "rf__n_estimators":list(range(100,500)),
    "rf__min_samples_split":list(range(2,20))
}
randPip = RandomizedSearchCV(
    pip,
    searchParam,
    cv= TimeSeriesSplit(5),
    n_iter=30,
    n_jobs = 5
)

In [27]:
randPip = randPip.fit(trainX,trainY)
prd = randPip.predict(testX)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



In [30]:
print(classification_report(testY,prd))

              precision    recall  f1-score   support

        down       0.48      0.48      0.48        25
    unchange       0.00      0.00      0.00         1
          up       0.63      0.65      0.64        34

    accuracy                           0.57        60
   macro avg       0.37      0.38      0.37        60
weighted avg       0.56      0.57      0.56        60



In [31]:
confusion_matrix(testY,prd)

array([[12,  0, 13],
       [ 1,  0,  0],
       [12,  0, 22]], dtype=int64)

After we train the model, we predict the data. And the singal is generated by the following rules:
1. the fist period should be buy.
2. buy and then hold until sell
3. buy when the predict value is up
4. sell when the predict value is down

In [32]:
def buy_sell_signal(dataList):
    res = []
    tmp ='down'
    for i in dataList:
        if tmp == i or i == 'unchange':
            if i == 'unchange':
                res.append('hold')
                continue
            tmp = i
            res.append('hold')
        else:
            tmp = i
            res.append(i)
    return res

In [33]:
det = pd.DataFrame({'prd':buy_sell_signal(prd)})
det['Close'] = testX.Close_GOOGL.tolist()
det['Date'] = testX.Date.tolist()

In [34]:
det['Buy'] = det[['Close','prd']].apply(lambda x: x.Close if x.prd == 'up' else 0,axis =1)
det['Sell'] = det[['Close','prd']].apply(lambda x: x.Close if x.prd == 'down' else 0,axis =1)
buySignal = det[det.Buy > 0][['Date','Buy']]
sellSignal = det[det.Sell > 0][['Date','Sell']]

In [35]:
det_tst = Time_Series_Transformer.from_pandas(det,'Date',None)
det_tst.plot(["Close"],'info').add_marker(
    x = buySignal['Date'],
    y = buySignal['Buy'],color = 'green',
    legendName = 'Buy').add_marker(
    x = sellSignal['Date'],
    y = sellSignal['Sell'],color = 'red',
    legendName = 'Sell')



In [37]:
det.Sell.sum() -det.Buy.sum()

268.119873046875

# Deep Learning

In [1]:
import tensorflow as tf
from time_series_transform.transform_core_api.time_series_transformer import Time_Series_Transformer
from time_series_transform.stock_transform.stock_transfromer import Stock_Transformer
import pandas as pd
import numpy as np
import pandas_ta as ta
import tensorflow_io as tfio

In [2]:
def up_or_down(current,lead):
    if current < lead:
        return 'up'
    elif current > lead:
        return "down"
    else:
        return "unchange"
def up_down_transform(data):
    currentList = data['Close_GOOGL']
    leadList = data['Close_GOOGL_lead_1']
    res = []
    for c,l in zip(currentList,leadList):
        res.append(up_or_down(c,l))
    return res

In [3]:
strategy = ta.Strategy(
    name= 'mystrategy',
    ta=[
        {"kind": "ema", "length": 7},
        {"kind": "ema", "length": 20},
        {"kind": "ema", "length": 3},
        {"kind": "bbands", "length": 20},
        {"kind": "bbands", "length": 50},
        {"kind": "bbands", "length": 30},
        {"kind": "rsi","prefix":"rsi"},
        {"kind": "macd", "fast": 8, "slow": 21},
    ]
)

In [4]:
st = Stock_Transformer.from_stock_engine_period(["GOOGL","NDAQ","GOLD"],'3y','yahoo')
st = st.get_technial_indicator(strategy)
st = st.dropna()
df = st.to_pandas(expandCategory= True, expandTime = False,preprocessType = 'remove')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    3.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    4.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    4.5s finished


In [5]:
tst = Time_Series_Transformer.from_pandas(df,'Date',None)
tst = tst.make_lag_sequence(
    inputLabels = ["Close_GOLD","Close_GOOGL","Close_NDAQ"],
    windowSize = 10,
    lagNum =1 ,
    suffix = "_lag_seq_")
tst = tst.make_stack_sequence(
    inputLabels = ["Close_GOLD_lag_seq_10","Close_GOOGL_lag_seq_10","Close_NDAQ_lag_seq_10"],
    newName = "Close_seq",axis = -1
)

tst = tst.make_lead('Close_GOOGL',1,'_lead_')
tst = tst.transform(['Close_GOOGL','Close_GOOGL_lead_1'],'change',up_down_transform)
tst = tst.make_label("change")
tst = tst.remove_feature("Close_GOOGL_lead_1")
tst = tst.remove_feature("Close_GOLD")
tst = tst.remove_feature("Close_GOOGL")
tst = tst.remove_feature("Close_NDAQ")
tst = tst.remove_feature("Close_GOLD_lag_seq_10")
tst = tst.remove_feature("Close_GOOGL_lag_seq_10")
tst = tst.remove_feature("Close_NDAQ_lag_seq_10")

In [17]:
X,y = tst.to_pandas(sepLabel=True)
testX,testY = X.tail(60),y.tail(60)
trainX,trainY = X.drop(testX.index),y.drop(testY.index)

In [22]:
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential(
    [
        layers.LSTM(4),
        layers.Flatten(),
        layers.Dense(1, name="layer3"),
    ]
)

In [33]:
xxx = np.asarray(trainX['Close_seq'].values)
yyyy = trainX['Open_NDAQ']

In [31]:
model.compile()

In [32]:
model.fit(xxx,yyyy)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).

In [48]:
np.array([1,2,3,list([1,2,3]),list([1,2,3])],dtype=object)

array([1, 2, 3, list([1, 2, 3]), list([1, 2, 3])], dtype=object)

In [50]:
trainX['Close_seq'].to_numpy()

[[[nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan]],
 [[nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan]],
 [[nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan]],
 [[nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan]],
 [[nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan],
  [nan, nan, nan]],
 [[nan, nan, nan],
  [nan, nan, nan],
  [nan,

In [55]:
confusion_matrix(testY,prd)

array([[ 3,  0, 11],
       [ 0,  0,  1],
       [ 1,  0,  8]], dtype=int64)

In [56]:
det = pd.DataFrame({'prd':buy_sell_signal(prd)})
det['Close'] = testX.Close_GOOGL.tolist()
det['Datetime'] = testX.Datetime.tolist()

In [57]:
det['Buy'] = det[['Close','prd']].apply(lambda x: x.Close if x.prd == 'up' else 0,axis =1)
det['Sell'] = det[['Close','prd']].apply(lambda x: x.Close if x.prd == 'down' else 0,axis =1)
buySignal = det[det.Buy > 0][['Datetime','Buy']]
sellSignal = det[det.Sell > 0][['Datetime','Sell']]

In [58]:
det_tst = Time_Series_Transformer.from_pandas(det,'Datetime',None)
det_tst.plot(["Close"],'info').add_marker(
    x = buySignal['Datetime'],
    y = buySignal['Buy'],color = 'green',
    legendName = 'Buy').add_marker(
    x = sellSignal['Datetime'],
    y = sellSignal['Sell'],color = 'red',
    legendName = 'Sell')



In [48]:
import numpy as np

In [49]:
a = np.array([[1,1,1]])

In [53]:
np.stack([a,a,a],-1).shape

(1, 3, 3)