In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import requests
import json
import urllib
import httpx
from datetime import datetime
from time import sleep

import sys
sys.path.append('../')
from config.tda.config import JSON_PATH, CONSUMER_KEY, REDIRECT_URI, WEBDRIVER, tda_login
from src.PaperTrader import PaperTrader
from src.TechAnalysis import TechAnalysis

from tda import auth, client, orders
from tda.auth import easy_client
from tda.client import Client

from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import f_regression, SelectKBest
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score
from xgboost import XGBClassifier
import pickle


account_id = 455003338

In [3]:
"""
Login to TDAMERITRADE
"""
c = tda_login(JSON_PATH, CONSUMER_KEY, REDIRECT_URI, WEBDRIVER)

In [5]:
"""
Login/pull data from TDAMERTIRADE
"""
stock_ticker = 'VOO'

data = pd.DataFrame(c.get_price_history_every_minute(stock_ticker).json()['candles'])
data['target'] = data['close'].shift(-1)

In [6]:
def time_processing(df):
    """
    Time function for TDA data
    """
    dt = np.array(df['datetime'].values)/1000
    func = lambda x: datetime.fromtimestamp(x)
    funcvec = np.vectorize(func)
    dt = funcvec(dt)
    return dt

Weights formula:  
$w_k = -w_{k-1} \frac{d - k + 1}{k}$

Weight converges to zero: 
$w_k \rightarrow 0$

In [7]:
"""
TechAnalysis tutorial
"""
column_name = 'close'
ta = TechAnalysis(data)
# data['ma'] = ta.moving_average(column_name, 5)
# data['rsi'] = ta.rsi(column_name, 10)
# data['macd'] = ta.macd(column_name, 10, 30)
# data['upper'], data['lower'] = ta.bollinger_bands(column_name, 10)
# ta.fib_retracement()

In [8]:
def preprocessing(df, price_offset = 1.000001, prediction = False, best_d_value = 1, len_of_original_weights = 64):
    column_name = 'close'
    ta = TechAnalysis(df)
    """
    Imputes fractional differencing into data
    """
    if prediction:
        df = df[(len(df) - (len_of_original_weights+1)):]
        df_fd, weights = ta.frac_diff(df[column_name], best_d_value)        
    else:
        
        df_fd, weights, best_d_value = ta.fractional_difference(column_name, alpha=.05)
       
    df['datetime'] = time_processing(df)    
    df['frac_diff_cost'] = np.nan
    df['frac_diff_cost'].iloc[len(weights):] = df_fd[0]

    """
    Create mass features
    """
    steps = [5, 10, 20, 30, 40, 50]
    macds = [[2,10],[5,10],[10,20],[10,30],[20,30]]
    bbs_std = [1, 1.5, 2]

    for step in steps:
        df[f'ma_{step}'] = ta.moving_average(column_name, step)
        df[f'ewa_{step}'] = ta.moving_average(column_name, step, simple=False)
        df[f'rsi_{step}'] = ta.rsi(column_name, step)
        for std in bbs_std:
            df[f'bb_{step}_{std}_upper'],  df[f'bb_{step}_{std}_lower']= ta.bollinger_bands(column_name, step, std = std)


    for macd in macds:
        short, long = macd
        df[f'rsi_{step}'] = ta.macd(column_name, short, long)

    if not prediction:
        df['target_classifier'] = 0
        df['target_classifier'][df['target']>df['close']*price_offset] = 1
        df.reset_index(inplace=True, drop=True)
        df.drop(['target'], axis=1, inplace=True)
        
    df.drop(['datetime', 'open', 'high', 'low'], axis=1, inplace=True)
    df.dropna(inplace=True)
    
    return df, best_d_value, weights

In [9]:
data, best_d_value, weights = preprocessing(data, price_offset = 1.000001)

In [10]:
"""
test/train/ver split
"""
train_size = round(.9*len(data))

train = data[:train_size]
test = data[train_size:]

y_train = train.pop('target_classifier')
X_train = train
# X_train.drop('datetime', axis=1, inplace=True)

y_test = test.pop('target_classifier')
X_test = test

In [11]:
bestfeatures = SelectKBest(score_func=f_regression, k=10)
fit = bestfeatures.fit(X_train,y_train)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X_train.columns)
featureScores = pd.concat([dfcolumns, dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(10,'Score'))  #print 10 best features

              Specs     Score
2    frac_diff_cost  5.160720
19    bb_10_2_upper  4.867168
10     bb_5_2_upper  4.799773
17  bb_10_1.5_upper  4.794945
8    bb_5_1.5_upper  4.741877
15    bb_10_1_upper  4.722266
6      bb_5_1_upper  4.683720
56    bb_50_2_lower  4.629668
54  bb_50_1.5_lower  4.607333
52    bb_50_1_lower  4.580221


In [12]:
"""
ML Pipeline
with Verification
"""
pipe = make_pipeline(SelectKBest(score_func=f_regression, k=10), GradientBoostingClassifier())
tscv = TimeSeriesSplit(n_splits=10)
parameters = {
    'gradientboostingclassifier__max_depth': range (2, 10, 1),
    'gradientboostingclassifier__n_estimators': range(60, 220, 40),
    'gradientboostingclassifier__learning_rate': [0.1, 0.01, 0.05]
}
clf = GridSearchCV(pipe, parameters, n_jobs=-1, cv=tscv, scoring = 'precision')
# clf = GridSearchCV(pipe, parameters, n_jobs=-1, cv=tscv, scoring = 'roc_auc')
clf.fit(X_train,y_train)
clf.best_params_

# load
# with open('model/model.pkl', 'rb') as f:
#     clf = pickle.load(f)
    


{'gradientboostingclassifier__learning_rate': 0.01,
 'gradientboostingclassifier__max_depth': 2,
 'gradientboostingclassifier__n_estimators': 60}

In [None]:
# save
# with open('model/model.pkl','wb') as f:
#     pickle.dump(clf,f)

In [13]:
clf.cv_results_
clf.best_score_

0.5313597724097894

In [14]:
y_pred = clf.predict(X_train)
print(precision_score(y_train, y_pred), accuracy_score(y_train, y_pred))

y_pred = clf.predict(X_test)
print(precision_score(y_test, y_pred), accuracy_score(y_test, y_pred))

0.8090909090909091 0.5159103486441616
0.6666666666666666 0.5211706102117061


In [15]:
"""
Tutorial of PaperTrader Class
"""
# PT = PaperTrader(500)
# key = PT.buy('MSFT', 100, 3)
# print(PT.current_record())
# print(PT.current_free_cash())
# PT.sell(key, 102, 3)
# print(pd.DataFrame(PT.current_record()))
# print(PT.current_free_cash())

'\nTutorial of PaperTrader Class\n'

In [16]:
PT_test = PaperTrader(5000)
X_test_temp = X_test['close'].reset_index()
for i, pred in enumerate(y_pred):
    if pred:
        key = PT_test.buy('VOO', X_test_temp['close'][i], 1)
        try:
            PT_test.sell(key, X_test_temp['close'][i+1],1)
        except:
            pass
print(pd.DataFrame(PT_test.current_record()))
print(PT_test.current_free_cash())

   key ticker  buy_price  buy_amount  buy_total_amount             buy_time  \
0    0    VOO     430.91           1            430.91  2022-01-17 15:53:06   
1    1    VOO     431.24           1            431.24  2022-01-17 15:53:06   
2    2    VOO     430.87           1            430.87  2022-01-17 15:53:06   

   sell_price  sell_amount  sell_total_amount            sell_time   open  \
0     431.155            1            431.155  2022-01-17 15:53:06  False   
1     431.205            1            431.205  2022-01-17 15:53:06  False   
2     430.950            1            430.950  2022-01-17 15:53:06  False   

   profit/loss  
0        0.245  
1       -0.035  
2        0.080  
5000.29


In [17]:
data_test = pd.DataFrame(c.get_price_history_every_minute(stock_ticker).json()['candles'])
pred_data, _, _ = preprocessing(data_test, price_offset = 1.000001, prediction = True, best_d_value = best_d_value, len_of_original_weights = len(weights))

In [18]:
def second_check(start):
    """
    Checks time for efficient pulls
    """
    later = datetime.now().minute
    if start == later:
        return False
    return True


def main():
    while True:
        start = datetime.now().minute
        sleep(5)
        new_second = second_check(start)

        if new_second:
            data_test = pd.DataFrame(c.get_price_history_every_minute(stock_ticker).json()['candles'])
            times = time_processing(data_test)
            pred_data, _, _ = preprocessing(data_test, price_offset = 1.000001, prediction = True, best_d_value = best_d_value, len_of_original_weights = len(weights))
            print(clf.predict(pred_data), times[-1])

def AI_trade(buy_signal: bool):
    """
    Buy and sell accoring to AI buy signal
    """
    if buy_signal:
        stock = 'VOO'
        buy_count = 1
        """ buy and sell 1 min later """
        c.place_order(account_id, 
            tda.orders.equities.equity_buy_market(stock, buy_count))
        sleep(60)
        c.place_order(account_id, 
            tda.orders.equities.equity_sell_market(stock, buy_count))
        
        
