In [80]:
import numpy as np
import pandas as pd

import joblib
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

from imblearn.over_sampling import RandomOverSampler

from collections import Counter
import datetime as dt
import schedule
import time

from dotenv import find_dotenv, get_key

import alpaca_trade_api as tradeapi
alpaca_api_key = get_key(find_dotenv(), 'ALPACA_API_KEY')
alpaca_secret_key = get_key(find_dotenv(), 'ALPACA_SECRET_KEY')
ALPACA_API_BASE_URL = 'https://paper-api.alpaca.markets'
api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    ALPACA_API_BASE_URL,
    api_version='v2',
)

import warnings
warnings.filterwarnings('ignore')

In [12]:
# Facebook
# Amazon
# Apple
# Netflix
# Google
# Microsoft
# Tesla
ticker_list = ['FB', 'AMZN', 'AAPL', 'NFLX', 'GOOGL', 'MSFT', 'TSLA']

start = '2021-01-05'
start = pd.Timestamp(f'{start} 09:30:00-0400', tz='America/New_York').replace(hour=9, minute=30, second=0).astimezone('GMT').isoformat()[:-6] + 'Z'
display(start)

end = '2021-01-05'
end = pd.Timestamp(f'{end} 16:00:00-0400', tz='America/New_York').replace(hour=16, minute=0, second=0).astimezone('GMT').isoformat()[:-6] + 'Z'
display(end)

timeframe = '1Min'

'2021-01-05T14:30:00Z'

'2021-01-05T21:00:00Z'

In [7]:
prices = api.get_barset(
    ticker_list,
    timeframe,
    limit=1000,
    start=start,
    end=end
).df
prices.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AMZN,AMZN,AMZN,AMZN,AMZN,...,NFLX,NFLX,NFLX,NFLX,NFLX,TSLA,TSLA,TSLA,TSLA,TSLA
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,...,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2021-01-05 09:30:00-05:00,128.96,129.485,128.45,129.485,51887,3166.8,3173.53,3166.8,3172.98,1963.0,...,521.98,521.98,520.77,521.03,1355.0,723.66,726.28,721.35,725.23,18284.0
2021-01-05 09:31:00-05:00,129.48,130.17,129.3,130.06,44188,3173.59,3182.67,3173.58,3177.81,1266.0,...,520.92,521.755,520.92,521.365,1112.0,726.6,726.999,722.42,723.0,7760.0
2021-01-05 09:32:00-05:00,130.17,130.32,129.93,130.02,12852,3175.0,3175.47,3174.91,3175.47,778.0,...,522.355,522.355,520.77,520.77,1347.0,723.1,723.1,719.78,720.57,9902.0
2021-01-05 09:33:00-05:00,130.09,130.14,129.78,130.12,14192,3181.52,3181.52,3177.87,3179.36,660.0,...,520.84,520.84,520.0,520.0,1582.0,720.53,722.71,719.22,719.71,7086.0
2021-01-05 09:34:00-05:00,130.15,130.58,130.15,130.51,12002,3183.66,3189.98,3183.66,3184.015,731.0,...,521.44,522.26,521.37,522.24,1039.0,719.97,724.22,719.97,724.22,8581.0


In [8]:
df_closing_prices = pd.DataFrame()
df_closing_prices['FB'] = prices['FB']['close']
df_closing_prices['AMZN'] = prices['AMZN']['close']
df_closing_prices['AAPL'] = prices['AAPL']['close']
df_closing_prices['NFLX'] = prices['NFLX']['close']
df_closing_prices['GOOGL'] = prices['GOOGL']['close']
df_closing_prices['MSFT'] = prices['MSFT']['close']
df_closing_prices['TSLA'] = prices['TSLA']['close']
df_closing_prices = df_closing_prices.ffill()
df_closing_prices.head()

Unnamed: 0_level_0,FB,AMZN,AAPL,NFLX,GOOGL,MSFT,TSLA
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-05 09:30:00-05:00,269.0,3172.98,129.485,521.03,1724.17,217.65,725.23
2021-01-05 09:31:00-05:00,269.17,3177.81,130.06,521.365,1724.05,217.63,723.0
2021-01-05 09:32:00-05:00,269.72,3175.47,130.02,520.77,1721.61,217.77,720.57
2021-01-05 09:33:00-05:00,268.8,3179.36,130.12,520.0,1721.61,217.72,719.71
2021-01-05 09:34:00-05:00,269.58,3184.015,130.51,522.24,1720.3,217.31,724.22


In [26]:
forecast = 1
returns = df_closing_prices.pct_change(forecast)
returns = returns.shift(-forecast)
returns = pd.DataFrame(returns.unstack())
returns = returns.rename(columns={0: f'F_{forecast}_m_returns'})
returns = returns.reset_index()
returns

Unnamed: 0,level_0,time,F_1_m_returns
0,FB,2021-01-05 09:30:00-05:00,0.000632
1,FB,2021-01-05 09:31:00-05:00,0.002043
2,FB,2021-01-05 09:32:00-05:00,-0.003411
3,FB,2021-01-05 09:33:00-05:00,0.002902
4,FB,2021-01-05 09:34:00-05:00,0.001335
...,...,...,...
2732,TSLA,2021-01-05 15:56:00-05:00,0.002033
2733,TSLA,2021-01-05 15:57:00-05:00,0.000463
2734,TSLA,2021-01-05 15:58:00-05:00,0.000680
2735,TSLA,2021-01-05 15:59:00-05:00,0.000000


In [27]:
momenta = [1, 5, 10]
for m in momenta:
    returns_temp = df_closing_prices.pct_change(m)
    returns_temp = pd.DataFrame(returns_temp.unstack())
    returns_temp = returns_temp.rename(columns={0: f'{m}_m_returns'})
    returns_temp = returns_temp.reset_index()
    returns = pd.merge(
        returns,
        returns_temp,
        left_on=['level_0', 'time'],
        right_on=['level_0', 'time'],
        how='left',
        suffixes=('_original', 'right'),
    )

In [29]:
returns = returns.dropna()
returns = returns.set_index(['level_0', 'time'])
returns

Unnamed: 0_level_0,Unnamed: 1_level_0,F_1_m_returns,1_m_returns,5_m_returns,10_m_returns
level_0,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
FB,2021-01-05 09:40:00-05:00,0.000814,0.000074,0.001260,0.004758
FB,2021-01-05 09:41:00-05:00,0.000887,0.000814,0.001889,0.004941
FB,2021-01-05 09:42:00-05:00,0.000628,0.000887,0.001999,0.003782
FB,2021-01-05 09:43:00-05:00,0.000480,0.000628,0.003408,0.007850
FB,2021-01-05 09:44:00-05:00,-0.001291,0.000480,0.002886,0.005416
...,...,...,...,...,...
TSLA,2021-01-05 15:55:00-05:00,-0.000259,0.000437,0.001140,0.000164
TSLA,2021-01-05 15:56:00-05:00,0.002033,-0.000259,0.000382,0.000806
TSLA,2021-01-05 15:57:00-05:00,0.000463,0.002033,0.003463,0.003868
TSLA,2021-01-05 15:58:00-05:00,0.000680,0.000463,0.004223,0.004140


In [None]:
#returns.to_csv('returns.csv')

---

In [62]:
returns = pd.read_csv('returns.csv', index_col=['level_0', 'level_1'])
X = returns.drop(columns=['F_1_m_returns'])
y = np.where(returns['F_1_m_returns'] > 0, 1, 0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)
display(Counter(y_train))

ros = RandomOverSampler(random_state=1)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)
display(Counter(y_resampled))

model = LogisticRegression()
model = model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, digits=4))

model = RandomForestClassifier(random_state=0)
model = model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, digits=4))

model = GradientBoostingClassifier(random_state=0)
model = model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, digits=4))

model = AdaBoostClassifier(random_state=0)
model = model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, digits=4))

model = XGBClassifier()
model = model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, digits=4))

#joblib.dump(log_model, 'log_model.pkl')

Counter({1: 668, 0: 1194})

Counter({1: 1194, 0: 1194})

              precision    recall  f1-score   support

           0     0.5365    0.5259    0.5312       405
           1     0.5212    0.5318    0.5264       393

    accuracy                         0.5288       798
   macro avg     0.5289    0.5289    0.5288       798
weighted avg     0.5290    0.5288    0.5288       798

              precision    recall  f1-score   support

           0     0.5041    0.6049    0.5499       405
           1     0.4872    0.3868    0.4312       393

    accuracy                         0.4975       798
   macro avg     0.4956    0.4959    0.4906       798
weighted avg     0.4958    0.4975    0.4915       798

              precision    recall  f1-score   support

           0     0.4976    0.5136    0.5055       405
           1     0.4816    0.4656    0.4735       393

    accuracy                         0.4900       798
   macro avg     0.4896    0.4896    0.4895       798
weighted avg     0.4897    0.4900    0.4897       798

              preci

In [83]:
def trade ():
    ticker_list = ['FB', 'AMZN', 'AAPL', 'NFLX', 'GOOGL', 'MSFT', 'TSLA']
    timeframe = '1Min'
    #start = '2021-01-06'
    #start = pd.Timestamp(f'{start} 09:30:00-0400', tz='America/New_York')\
    #    .replace(hour=9, minute=30, second=0).astimezone('GMT')\
    #    .isoformat()[:-6] + 'Z'
    #end = '2021-01-06'
    #end = pd.Timestamp(f'{end} 16:00:00-0400', tz='America/New_York')\
    #    .replace(hour=15, minute=0, second=0).astimezone('GMT')\
    #    .isoformat()[:-6] + 'Z'
    prices = api.get_barset(
        ticker_list,
        timeframe,
    #    start=start,
    #    end=end,
    ).df.iloc[-11:]
    prices = prices.ffill()

    df_closing_prices = pd.DataFrame()
    df_closing_prices['FB'] = prices['FB']['close']
    df_closing_prices['AMZN'] = prices['AMZN']['close']
    df_closing_prices['AAPL'] = prices['AAPL']['close']
    df_closing_prices['NFLX'] = prices['NFLX']['close']
    df_closing_prices['GOOGL'] = prices['GOOGL']['close']
    df_closing_prices['MSFT'] = prices['MSFT']['close']
    df_closing_prices['TSLA'] = prices['TSLA']['close']

    momenta = [1, 5, 10]
    for m in momenta:
        returns_temp = df_closing_prices.pct_change(m)
        returns_temp = pd.DataFrame(returns_temp.unstack())
        returns_temp = returns_temp.rename(columns={0: f'{m}_m_returns'})
        returns_temp = returns_temp.reset_index()
        if m == 1:
            returns = returns_temp
        else:
            returns = pd.merge(
                returns,
                returns_temp,
                left_on=['level_0', 'time'],
                right_on=['level_0', 'time'],
                how='left',
                suffixes=('_original', 'right'),
            )

    returns = returns.dropna()
    returns = returns.set_index(['level_0', 'time'])

    X = returns
    model = joblib.load('log_model.pkl')
    y_pred = model.predict(X)
    y_pred = pd.DataFrame(y_pred, index=X.index)
    y_pred = y_pred.rename(columns={0: 'buy'})
    y_pred = y_pred[y_pred['buy'] == 1]

    buy_dict = dict.fromkeys(y_pred.index.get_level_values(0), 'n')

    #total_capital = api.get_account().equity
    #print(f'Total available capital: {total_capital}')

    # split capital among stocks and determine buy or sell
    if len(buy_dict) > 0:
        capital_per_stock = float(total_capital) / len(buy_dict)
    else:
        capital_per_stock = 0
    #print(f'Capital per stock: {capital_per_stock}')
    for ticker in buy_dict:
        try:
            buy_dict[ticker] = int(capital_per_stock / int(prices[ticker].iloc[-1]['close']))
        except:
            pass
    #print(buy_dict)

    # cancel pending orders and close positions
    api.cancel_all_orders()
    api.close_all_positions()

    # submit orders
    for ticker, numShares in buy_dict.items():
        print('buying ' + ticker + ' numShare ' + str(numShares))
        if numShares > 0:
            api.submit_order(ticker, side='buy', qty=numShares, type='market', time_in_force='gtc')

In [84]:
schedule.clear()
schedule.every().minute.at(':05').do(trade)
clock = api.get_clock().is_open
while clock == True:
    schedule.run_pending()

---