In [1]:
import pandas as pd
import numpy as np
import requests
import json
import urllib
import httpx
from datetime import datetime

import sys
sys.path.append('../')
from config.tda.config import CONSUMER_KEY, REDIRECT_URI, JSON_PATH
from src.PaperTrader import PaperTrader
from src.TechAnalysis import TechAnalysis

from tda import auth, client
from tda.auth import easy_client
from tda.client import Client

token_path = JSON_PATH
api_key = CONSUMER_KEY
redirect_uri = REDIRECT_URI

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import f_regression, SelectKBest
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score

In [2]:
"""
Login to TDAMERITRADE
"""

try:
    c = auth.client_from_token_file(token_path, api_key)
except:
    from selenium import webdriver
    with webdriver.Chrome('/home/daniel/chromedriver') as driver:
        c = auth.client_from_login_flow(
            driver, api_key, redirect_uri, token_path)

In [3]:
"""
Login/pull data from TDAMERTIRADE
"""

stock_ticker = 'VOO'

c = easy_client(
        api_key=api_key,
        redirect_uri=REDIRECT_URI,
        token_path=JSON_PATH)

resp = c.get_price_history(stock_ticker,
        period_type=Client.PriceHistory.PeriodType.YEAR,
        period=Client.PriceHistory.Period.TWENTY_YEARS,
        frequency_type=Client.PriceHistory.FrequencyType.DAILY,
        frequency=Client.PriceHistory.Frequency.DAILY)
assert resp.status_code == httpx.codes.OK

data = pd.DataFrame(c.get_price_history_every_minute(stock_ticker).json()['candles'])
data['target'] = data['close'].shift(-1)

In [4]:
"""
Time function
"""
def time_processing(df):
    dt = np.array(df['datetime'].values)/1000
    func = lambda x: datetime.fromtimestamp(x)
    funcvec = np.vectorize(func)
    dt = funcvec(dt)
    df['datetime'] = dt
    return df

data = time_processing(data)

In [5]:
"""
TechAnalysis tutorial
"""
column_name = 'close'
ta = TechAnalysis(data)
# data['ma'] = ta.moving_average(column_name, 5)
# data['rsi'] = ta.rsi(column_name, 10)
# data['macd'] = ta.macd(column_name, 10, 30)
# data['upper'], data['lower'] = ta.bollinger_bands(column_name, 10)
# ta.fib_retracement()
print(data)

         open    high     low   close  volume            datetime  target
0      429.33  429.33  429.33  429.33     200 2021-11-16 04:11:00  429.54
1      429.54  429.54  429.54  429.54     100 2021-11-16 04:56:00  429.28
2      429.28  429.28  429.28  429.28     300 2021-11-16 05:33:00  429.29
3      429.27  429.29  429.27  429.29     200 2021-11-16 06:00:00  429.29
4      429.29  429.29  429.29  429.29     100 2021-11-16 06:04:00  429.29
...       ...     ...     ...     ...     ...                 ...     ...
16069  436.76  436.76  436.76  436.76     572 2021-12-31 16:59:00  437.17
16070  437.17  437.17  437.17  437.17     113 2021-12-31 17:18:00  437.07
16071  437.07  437.07  437.07  437.07     100 2021-12-31 17:51:00  437.06
16072  437.06  437.06  437.06  437.06     475 2021-12-31 18:02:00  437.05
16073  437.05  437.05  437.05  437.05     600 2021-12-31 18:04:00     NaN

[16074 rows x 7 columns]


In [30]:
"""
Create mass features
"""

data = pd.DataFrame(c.get_price_history_every_minute(stock_ticker).json()['candles'])
data['target'] = data['close'].shift(-1)
column_name = 'close'
ta = TechAnalysis(data)
price_offset=1.0001

steps = [5, 10, 20, 30, 40, 50]
macds = [[2,10],[5,10],[10,20],[10,30],[20,30]]
bbs_std = [1, 1.5, 2]

for step in steps:
    data[f'ma_{step}'] = ta.moving_average(column_name, step)
    data[f'ewa_{step}'] = ta.moving_average(column_name, step, simple=False)
    data[f'rsi_{step}'] = ta.rsi(column_name, step)
    for std in bbs_std:
        data[f'bb_{step}_{std}_upper'],  data[f'bb_{step}_{std}_lower']= ta.bollinger_bands(column_name, step, std = std)


for macd in macds:
    short, long = macd
    data[f'rsi_{step}'] = ta.macd(column_name, short, long)

data = time_processing(data)
data['target_classifier'] = 0
data['target_classifier'][data['target']>data['close']*price_offset] = 1
data.reset_index(inplace=True, drop=True)
data.drop(['datetime','target'], axis=1, inplace=True)
data.dropna(inplace=True)
# data[['close', 'target','target_classifier']]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [31]:
data['target_classifier'].sum()

5462

In [32]:
# data[['close','target','target_classifier']]

In [33]:
"""
test/train/ver split
"""


train_size = round(.9*len(data))

train = data[:train_size]
test = data[train_size:]

y_train = train.pop('target_classifier')
X_train = train
# X_train.drop('datetime', axis=1, inplace=True)

y_test = test.pop('target_classifier')
X_test = test


In [34]:
bestfeatures = SelectKBest(score_func=f_regression, k=10)
fit = bestfeatures.fit(X_train,y_train)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X_train.columns)
featureScores = pd.concat([dfcolumns, dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(10,'Score'))  #print 10 best features

              Specs       Score
31    bb_20_2_lower  134.858700
40    bb_30_2_lower  133.960627
49    bb_40_2_lower  133.701332
29  bb_20_1.5_lower  132.779300
22    bb_10_2_lower  131.991780
58    bb_50_2_lower  131.769967
38  bb_30_1.5_lower  131.349503
20  bb_10_1.5_lower  131.036227
47  bb_40_1.5_lower  130.613368
27    bb_20_1_lower  130.526850


In [35]:
f_reg_largest = featureScores.nlargest(10,'Score')
X_train_filtered = X_train[X_train.columns[f_reg_largest.index]]
X_test_filtered = X_test[X_test.columns[f_reg_largest.index]]

In [36]:
tscv = TimeSeriesSplit(n_splits=2)

parameters = {}
# gbr = GradientBoostingRegressor()
# lr = LinearRegression()
gbc = GradientBoostingClassifier()
clf = GridSearchCV(gbc, parameters, n_jobs=-1, cv=tscv)
clf.fit(X_train_filtered,y_train)
clf.best_params_

{}

In [37]:
clf.cv_results_
clf.best_score_

0.6261701685042647

In [38]:
y_pred = clf.predict(X_train_filtered)
print(precision_score(y_train, y_pred))

y_pred = clf.predict(X_test_filtered)
print(precision_score(y_test, y_pred))


0.9285714285714286
0.16666666666666666


In [39]:
"""
Tutorial of PaperTrader Class
"""
# PT = PaperTrader(500)
# key = PT.buy('MSFT', 100, 3)
# print(PT.current_record())
# print(PT.current_free_cash())
# PT.sell(key, 102, 3)
# print(pd.DataFrame(PT.current_record()))
# print(PT.current_free_cash())
        

'\nTutorial of PaperTrader Class\n'

In [40]:
print(X_test.head())
print(X_test_temp.head())
print(y_pred[:10])

           open    high      low    close  volume     ma_5       ewa_5  \
14472  438.3400  438.43  438.320  438.415    8413  438.228  438.365209   
14473  438.3801  438.41  438.318  438.380    8115  438.265  438.375070   
14474  438.3500  438.47  438.328  438.470    9453  438.351  438.438357   
14475  438.4500  438.46  438.390  438.400    6054  438.399  438.412786   
14476  438.4100  438.41  438.250  438.280    7681  438.389  438.324262   

           rsi_5  bb_5_1_upper  bb_5_1_lower  ...  bb_40_2_lower       ma_50  \
14472  67.567568    438.374995    438.081005  ...     438.044835  438.219464   
14473  66.371681    438.424374    438.105626  ...     438.074025  438.229664   
14474  93.000000    438.469343    438.232657  ...     438.077347  438.239302   
14475  76.666667    438.450039    438.347961  ...     438.084665  438.248502   
14476  43.750000    438.458498    438.319502  ...     438.094047  438.256902   

           ewa_50    rsi_50  bb_50_1_upper  bb_50_1_lower  bb_50_1.5_upper

In [41]:
PT_test = PaperTrader(5000)
X_test_temp = X_test['close'].reset_index()
for i, pred in enumerate(y_pred):
    if pred:
        key = PT_test.buy('VOO', X_test_temp['close'][i], 1)
        try:
            PT_test.sell(key, X_test_temp['close'][i+1],1)
        except:
            pass
print(pd.DataFrame(PT_test.current_record()))
print(PT_test.current_free_cash())

   key ticker  buy_price  buy_amount  buy_total_amount             buy_time  \
0    0    VOO    437.720           1           437.720  2022-01-03 02:26:11   
1    1    VOO    439.060           1           439.060  2022-01-03 02:26:11   
2    2    VOO    439.000           1           439.000  2022-01-03 02:26:11   
3    3    VOO    439.020           1           439.020  2022-01-03 02:26:11   
4    4    VOO    438.855           1           438.855  2022-01-03 02:26:11   
5    5    VOO    438.860           1           438.860  2022-01-03 02:26:11   

   sell_price  sell_amount  sell_total_amount            sell_time   open  \
0      437.57            1             437.57  2022-01-03 02:26:11  False   
1      439.00            1             439.00  2022-01-03 02:26:11  False   
2      439.02            1             439.02  2022-01-03 02:26:11  False   
3      439.09            1             439.09  2022-01-03 02:26:11  False   
4      438.86            1             438.86  2022-01-03 02:

In [None]:
X_test_temp