In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import requests
import json
import urllib
import httpx
from datetime import datetime

import sys
sys.path.append('../')
from config.tda.config import CONSUMER_KEY, REDIRECT_URI, JSON_PATH
from src.PaperTrader import PaperTrader
from src.TechAnalysis import TechAnalysis

from tda import auth, client
from tda.auth import easy_client
from tda.client import Client

token_path = JSON_PATH
api_key = CONSUMER_KEY
redirect_uri = REDIRECT_URI

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import f_regression, SelectKBest
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score
from xgboost import XGBClassifier
import pickle

In [2]:
"""
Login to TDAMERITRADE
"""
try:
    c = auth.client_from_token_file(token_path, api_key)
except:
    from selenium import webdriver
    with webdriver.Chrome('/home/daniel/chromedriver') as driver:
        c = auth.client_from_login_flow(
            driver, api_key, redirect_uri, token_path)

In [3]:
"""
Login/pull data from TDAMERTIRADE
"""
stock_ticker = 'VOO'

c = easy_client(
        api_key=api_key,
        redirect_uri=REDIRECT_URI,
        token_path=JSON_PATH)

resp = c.get_price_history(stock_ticker,
        period_type=Client.PriceHistory.PeriodType.YEAR,
        period=Client.PriceHistory.Period.TWENTY_YEARS,
        frequency_type=Client.PriceHistory.FrequencyType.DAILY,
        frequency=Client.PriceHistory.Frequency.DAILY)
assert resp.status_code == httpx.codes.OK

data = pd.DataFrame(c.get_price_history_every_minute(stock_ticker).json()['candles'])
data['target'] = data['close'].shift(-1)

In [4]:
"""
Time function
"""
def time_processing(df):
    dt = np.array(df['datetime'].values)/1000
    func = lambda x: datetime.fromtimestamp(x)
    funcvec = np.vectorize(func)
    dt = funcvec(dt)
    df['datetime'] = dt
    return df

Weights formula:  
$w_k = -w_{k-1} \frac{d - k + 1}{k}$

Weight converges to zero: 
$w_k \rightarrow 0$

In [5]:
"""
TechAnalysis tutorial
"""
column_name = 'close'
ta = TechAnalysis(data)
# data['ma'] = ta.moving_average(column_name, 5)
# data['rsi'] = ta.rsi(column_name, 10)
# data['macd'] = ta.macd(column_name, 10, 30)
# data['upper'], data['lower'] = ta.bollinger_bands(column_name, 10)
# ta.fib_retracement()

In [6]:
data

Unnamed: 0,open,high,low,close,volume,datetime,target
0,428.63,428.63,428.59,428.59,1300,1637658060000,428.54
1,428.54,428.54,428.54,428.54,100,1637658120000,428.41
2,428.46,428.46,428.41,428.41,597,1637658240000,428.30
3,428.34,428.34,428.30,428.30,300,1637658420000,428.32
4,428.30,428.32,428.30,428.32,300,1637658480000,428.23
...,...,...,...,...,...,...,...
16185,428.42,428.42,428.42,428.42,100,1641592440000,428.37
16186,428.37,428.37,428.37,428.37,100,1641592560000,428.45
16187,428.45,428.45,428.45,428.45,447,1641594120000,428.53
16188,428.53,428.53,428.53,428.53,206,1641594480000,428.58


In [7]:
def preprocessing(df, price_offset = 1.000001, prediction = False, best_d_value = 1):
    column_name = 'close'
    ta = TechAnalysis(df)
    """
    Imputes fractional differencing into data
    """
    if prediction:
        df = df[(len(df) - 65):]
        df_fd, weights = ta.frac_diff(df[column_name], best_d_value)
    else:
        df = time_processing(df)
        df_fd, weights, best_d_value = ta.fractional_difference(column_name, alpha=.05)
       
        
    df['frac_diff_cost'] = np.nan
    df['frac_diff_cost'].iloc[len(weights):] = df_fd[0]

    """
    Create mass features
    """
    steps = [5, 10, 20, 30, 40, 50]
    macds = [[2,10],[5,10],[10,20],[10,30],[20,30]]
    bbs_std = [1, 1.5, 2]

    for step in steps:
        df[f'ma_{step}'] = ta.moving_average(column_name, step)
        df[f'ewa_{step}'] = ta.moving_average(column_name, step, simple=False)
        df[f'rsi_{step}'] = ta.rsi(column_name, step)
        for std in bbs_std:
            df[f'bb_{step}_{std}_upper'],  df[f'bb_{step}_{std}_lower']= ta.bollinger_bands(column_name, step, std = std)


    for macd in macds:
        short, long = macd
        df[f'rsi_{step}'] = ta.macd(column_name, short, long)

    if not prediction:
        df['target_classifier'] = 0
        df['target_classifier'][df['target']>df['close']*price_offset] = 1
        df.reset_index(inplace=True, drop=True)
        df.drop(['datetime', 'target', 'open', 'high', 'low'], axis=1, inplace=True)
    
    df.dropna(inplace=True)
    
    return df, best_d_value

In [8]:
data, best_d_value = preprocessing(data, price_offset = 1.000001, prediction = False)

In [10]:
"""
test/train/ver split
"""
train_size = round(.9*len(data))

train = data[:train_size]
test = data[train_size:]

y_train = train.pop('target_classifier')
X_train = train
# X_train.drop('datetime', axis=1, inplace=True)

y_test = test.pop('target_classifier')
X_test = test

In [11]:
bestfeatures = SelectKBest(score_func=f_regression, k=10)
fit = bestfeatures.fit(X_train,y_train)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X_train.columns)
featureScores = pd.concat([dfcolumns, dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(10,'Score'))  #print 10 best features

              Specs     Score
23           rsi_20  5.272517
50           rsi_50  4.246009
2    frac_diff_cost  3.553181
10     bb_5_2_upper  2.849324
19    bb_10_2_upper  2.823238
8    bb_5_1.5_upper  2.750887
17  bb_10_1.5_upper  2.724885
6      bb_5_1_upper  2.653967
15    bb_10_1_upper  2.627751
3              ma_5  2.464915


In [11]:
# f_reg_largest = featureScores.nlargest(10,'Score')
# X_train_filtered = X_train[X_train.columns[f_reg_largest.index]]
# X_test_filtered = X_test[X_test.columns[f_reg_largest.index]]

In [12]:
"""
ML Pipeline
with Verification
"""
# pipe = make_pipeline(SelectKBest(score_func=f_regression, k=10), GradientBoostingClassifier())
# tscv = TimeSeriesSplit(n_splits=10)
# parameters = {
#     'gradientboostingclassifier__max_depth': range (2, 10, 1),
#     'gradientboostingclassifier__n_estimators': range(60, 220, 40),
#     'gradientboostingclassifier__learning_rate': [0.1, 0.01, 0.05]
# }
# clf = GridSearchCV(pipe, parameters, n_jobs=-1, cv=tscv, scoring = 'precision')
# # clf = GridSearchCV(pipe, parameters, n_jobs=-1, cv=tscv, scoring = 'roc_auc')
# clf.fit(X_train,y_train)
# clf.best_params_

# load
with open('model/model.pkl', 'rb') as f:
    clf = pickle.load(f)

In [13]:
clf.cv_results_
clf.best_score_

0.533905676520757

In [14]:
y_pred = clf.predict(X_train)
print(precision_score(y_train, y_pred), accuracy_score(y_train, y_pred))

y_pred = clf.predict(X_test)
print(precision_score(y_test, y_pred), accuracy_score(y_test, y_pred))

0.6018641810918774 0.5201571221831713
0.5416666666666666 0.5341191066997518


In [15]:
"""
Tutorial of PaperTrader Class
"""
# PT = PaperTrader(500)
# key = PT.buy('MSFT', 100, 3)
# print(PT.current_record())
# print(PT.current_free_cash())
# PT.sell(key, 102, 3)
# print(pd.DataFrame(PT.current_record()))
# print(PT.current_free_cash())

'\nTutorial of PaperTrader Class\n'

In [17]:
PT_test = PaperTrader(5000)
X_test_temp = X_test['close'].reset_index()
for i, pred in enumerate(y_pred):
    if pred:
        key = PT_test.buy('VOO', X_test_temp['close'][i], 1)
        try:
            PT_test.sell(key, X_test_temp['close'][i+1],1)
        except:
            pass
print(pd.DataFrame(PT_test.current_record()))
print(PT_test.current_free_cash())

     key ticker  buy_price  buy_amount  buy_total_amount             buy_time  \
0      0    VOO   439.7100           1          439.7100  2022-01-10 03:24:36   
1      1    VOO   439.3200           1          439.3200  2022-01-10 03:24:36   
2      2    VOO   439.3314           1          439.3314  2022-01-10 03:24:36   
3      3    VOO   438.9000           1          438.9000  2022-01-10 03:24:36   
4      4    VOO   438.8000           1          438.8000  2022-01-10 03:24:36   
..   ...    ...        ...         ...               ...                  ...   
115  115    VOO   428.5600           1          428.5600  2022-01-10 03:24:36   
116  116    VOO   428.5200           1          428.5200  2022-01-10 03:24:36   
117  117    VOO   428.4300           1          428.4300  2022-01-10 03:24:36   
118  118    VOO   428.4900           1          428.4900  2022-01-10 03:24:36   
119  119    VOO   428.4100           1          428.4100  2022-01-10 03:24:36   

     sell_price  sell_amoun

In [22]:
# save
# with open('model/model.pkl','wb') as f:
#     pickle.dump(clf,f)

In [None]:
pred_data, _ = preprocessing(data, price_offset = 1.000001, prediction = True, best_d_value = best_d_value)