In [1]:
# Import useful packages
import os
import time
import pandas as pd
# Suppressing some warnings in pandas
pd.options.mode.chained_assignment = None
import numpy as np
import glob
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime, date, time, timedelta
from ta import add_all_ta_features

# Importing our self-created functions
from feature_creation import *
from portfolio import *

# Dash modules
import dash
import dash_table
import dash_core_components as dcc
import dash_html_components as html
from jupyter_dash import JupyterDash
import plotly.express as px
import plotly.graph_objects as go

# Set up jupyter proxy
# JupyterDash.infer_jupyter_proxy_config()

%matplotlib inline

In [2]:
# # **Only used when re-creating feature_df from scratch (time consuming)**
# # Creating stock_df from feature file
# stock_df = daily_features()
# recommendations_df = pd.read_csv('assets/ticker_recommendations.csv')
# stock_df.head()

In [3]:
# # **Only used when re-creating feature_df from scratch (time consuming)**
# # Creating feature_df from scratch, but computing technical analysis features for all tickers

# # Create list of tickers for creating technical features and later usage
# tickers = list(stock_df.ticker.unique())

# # Running technical analysis feature creation for all tickers individually

# # count = 0

# for ticker in tickers:
    
#     temp_df = stock_df[stock_df['ticker']==ticker]
    
#     temp_df = add_all_ta_features(
#     temp_df, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True)
    
#     if count == 0:
#         feature_df = temp_df
#         count += 1
        
#     else:
#         feature_df = pd.concat([feature_df,temp_df])
#         count += 1
#     print(f'Finished {ticker}. Done with {count} of 504 tickers')
    
# # save feature df to csv for future usage. Feature creation takes hours on entire dataset
# feature_df.to_csv('assets/models/tyler_rf_daily_update/ta_feature_df.csv')

In [6]:
# Or just load if no need to re-create features
feature_df = pd.read_csv('assets/models/tyler_rf_daily_update/ta_feature_df.csv',index_col=0)

# Ensure index is datetime
feature_df.index = pd.to_datetime(feature_df.index)

feature_df.head()

Unnamed: 0_level_0,Adj Close,Close,Close_adj,Dividends,High,High_adj,Low,Low_adj,Open,Open_adj,...,momentum_wr,momentum_ao,momentum_kama,momentum_roc,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,others_dr,others_dlr,others_cr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1990-02-16,0.056967,0.056967,0.077257,0.0,0.058887,0.079861,0.054407,0.073785,0.0,0.0,...,-42.857143,0.0,0.056967,0.0,0.0,0.0,0.0,-99.670356,0.0,0.0
1990-02-20,0.058887,0.058887,0.079861,0.0,0.058887,0.079861,0.055047,0.074653,0.0,0.0,...,-0.000804,0.0,0.057802,0.0,-7.132343,-1.426469,-5.705874,3.370505,3.314949,3.370505
1990-02-21,0.057607,0.057607,0.078125,0.0,0.058247,0.078993,0.055687,0.075521,0.0,0.0,...,-28.571863,0.0,0.057718,0.0,-14.352665,-4.011708,-10.340957,-2.17375,-2.197724,1.123489
1990-02-22,0.058247,0.058247,0.078993,0.0,0.060167,0.081597,0.058247,0.078993,0.0,0.0,...,-33.333463,0.0,0.057944,0.0,-21.164525,-7.442271,-13.722254,1.111033,1.104906,2.247004
1990-02-23,0.057927,0.057927,0.078559,0.0,0.058887,0.079861,0.057607,0.078125,0.0,0.0,...,-38.8891,0.0,0.057937,0.0,-27.325018,-11.418821,-15.906197,-0.549422,-0.550937,1.685236


In [8]:
# Simple visualization of a ticker price over time to ensure data looks accurate
fig = go.Figure()

ticker = 'AAPL'
df = feature_df[(feature_df['ticker']==ticker)&(feature_df.index>='2018-01-01')]

fig.add_trace(go.Scatter(x=df.index,
                         y=df['Close'],
                        line={"color": "#228B22"},
                        mode="lines",
                        name='Closing Price'))

fig.add_trace(go.Scatter(x=df.index,
                         y=df['trend_sma_fast'],
                        line={"color": "red","dash":"dash","width":1},
                        mode="lines",
                        name='Closing Price'))

fig.add_trace(go.Scatter(x=df.index,
                         y=df['trend_sma_slow'],
                        line={"color": "black","dash":"dash","width":1},
                        mode="lines",
                        name='Closing Price'))


fig.update_layout(title_text=f'{ticker} Closing Price',title_x=0.5,
                         template="ggplot2",font=dict(size=10,color='black'),xaxis_showgrid=False,
                         paper_bgcolor='rgba(0,0,0,0)',
                         yaxis_title="Closing Price",margin={"r": 20, "t": 35, "l": 20, "b": 10},
                         showlegend=False)

fig.show()

## Part 1: Feature Selection
#### In this section, we train a lightly configured Random Forest Regressor to get feature importance of all features. Using this set of features, we order based on importance and only select features over a certain threshold of importance

In [9]:
# Creating sample df for AAPL only, as we train models for all tickers eventually
sample_df = feature_df[(feature_df['ticker']=='AAPL')&(feature_df.index>='2018-01-01')&\
                       (feature_df.index<='2020-12-31')]

# Creating target variables to look at performance in forecasting at different time horizons
sample_df['target_7'] = sample_df['Close_adj'].shift(-7)
sample_df['target_30'] = sample_df['Close_adj'].shift(-30)
sample_df['target_60'] = sample_df['Close_adj'].shift(-60)
sample_df['target_120'] = sample_df['Close_adj'].shift(-120)

In [10]:
# Test Train split
split_perc = .8
train_df = sample_df.iloc[:int(len(sample_df)*split_perc)]
test_df = sample_df.iloc[int(len(sample_df)*split_perc):]

# Ensuring the test/train split worked correctly
fig = go.Figure()

fig.add_trace(go.Scatter(x=train_df.index,
                         y=train_df['Close_adj'],
                        line={"color": "#228B22"},
                        mode="lines",
                        name='Train'))

fig.add_trace(go.Scatter(x=test_df.index,
                         y=test_df['Close_adj'],
                        line={"color": "red"},
                        mode="lines",
                        name='Test'))

In [11]:
# Training our initial RF Reg model

# Get actual feature columns from train_df
features = list(train_df.columns)[15:len(train_df.columns)-4]

# Using a Random Forest Regressor to test time horizon predictions
from sklearn.ensemble import RandomForestRegressor

regr = RandomForestRegressor(max_depth=5, random_state=0)

X = train_df[features]
y = train_df['target_7']

regr.fit(X, y)

RandomForestRegressor(max_depth=5, random_state=0)

In [12]:
# Plotting feature importance. Vast majority of features seem to be of no use to the model, so will drop
fig = go.Figure()

feats = [x for _, x in sorted(zip(regr.feature_importances_,features),reverse=True)]
importance = sorted(regr.feature_importances_,reverse=True)

fig.add_trace(go.Bar(x=feats, y=importance,name="Feature Importance"))

fig.add_trace(go.Scatter(x=feats,
                         y=np.cumsum(sorted(regr.feature_importances_,reverse=True)),
                         line={"color":"black"},
                         mode="lines",
                         name="Cumulative Importance"
                        ))
                         

fig.show()

In [13]:
# Getting only features that are important up to a threshold
thresh = .96
thresh_len = len([x for x in np.cumsum(sorted(regr.feature_importances_,reverse=True)) if x <= thresh])

# Using this loop to ensure features we keep are adding substantial value
[x for x in np.cumsum(sorted(regr.feature_importances_,reverse=True)) if x <= thresh]

[0.15628465547392478,
 0.24228234717072014,
 0.32039484180548344,
 0.38866208827593385,
 0.4506052670468377,
 0.5055831210746419,
 0.5602295647288507,
 0.6143937926918397,
 0.6685112223888728,
 0.7078305760461447,
 0.7468338461672918,
 0.7856395471821814,
 0.8175426731361459,
 0.8491085682666626,
 0.8804056224797144,
 0.9039043814911785,
 0.9200262138942854,
 0.9357654740519502,
 0.9475023790761609,
 0.9563901586841211]

## Part 2: Exploring Target Variables
#### We are going to explore model prediction capabilities by training 4 models tasked with predicting future price over some time horizon (7, 30, 60, 120 days), which will tell us if we should be buying or selling to take advantage of price changes

In [14]:
# Including all features under importance threshold
updated_feats = feats[:thresh_len+1]

# Re-training model with smaller subset of features
regr = RandomForestRegressor(max_depth=5, random_state=0)

X = train_df[updated_feats]
y = train_df['target_7']

regr.fit(X, y)

RandomForestRegressor(max_depth=5, random_state=0)

In [15]:
# Feature importances still seem appropriate given all are somewhat close to 1%
# R^2 is very high. This model is most likely highly overfit
regr.feature_importances_, regr.score(X,y)

(array([0.16948429, 0.06490748, 0.05832252, 0.07059909, 0.05465841,
        0.03135002, 0.07292808, 0.02404239, 0.03959279, 0.07317919,
        0.04747828, 0.02385721, 0.04247838, 0.07079181, 0.01585867,
        0.03156786, 0.03222813, 0.01629333, 0.02824216, 0.01741371,
        0.01472619]),
 0.9889727141657079)

In [16]:
# Creating our train data points from predicting our target
train_predictions = regr.predict(train_df[updated_feats])

# Creating our test data points from predicting our target
test_predictions = regr.predict(test_df[updated_feats])

fig = go.Figure()

fig.add_trace(go.Scatter(x=train_df.index,
                         y=train_df['Close_adj'],
                        line={"color": "#228B22"},
                        mode="lines",
                        name='Train'))

fig.add_trace(go.Scatter(x=train_df.index+timedelta(days=7),
                         y=train_predictions,
                        line={"color": "gray","dash":"dash"},
                        mode="lines",
                        name='Train Predictions'))

fig.add_trace(go.Scatter(x=test_df.index,
                         y=test_df['Close_adj'],
                        line={"color": "#228B22","dash":"dash"},
                        mode="lines",
                        name='Test Actuals'))

fig.add_trace(go.Scatter(x=test_df.index+timedelta(days=7),
                         y=test_predictions,
                        line={"color": "red"},
                        mode="lines",
                        name='Test Predictions (7 Day)'))

#### Observations:
1. Looking at prediction performance. So far, our model does a horrible job at predicting the future.
2. Will look into some thresholding on price projections/ROI to making buying/selling decisions

#### Adding additional models with longer targets

In [17]:
# Training a model on 30_Day
regr = RandomForestRegressor(max_depth=5, random_state=0)

X = train_df[updated_feats]
y = train_df['target_30']

regr.fit(X, y)

print(regr.feature_importances_, regr.score(X,y))

test_predictions = regr.predict(test_df[updated_feats])

fig.add_trace(go.Scatter(x=test_df.index+timedelta(days=30),
                         y=test_predictions,
                        line={"color": "red","dash":"dash"},
                        mode="lines",
                        name='Test Predictions (30 Day)'))

[2.67686123e-02 1.07320538e-02 2.50985942e-02 5.92241486e-03
 5.28720906e-03 1.50084688e-03 4.03824563e-02 5.16487258e-04
 7.07360201e-01 1.22909395e-02 1.70066724e-02 1.90410000e-03
 1.64970733e-03 5.64535987e-04 8.71526850e-03 1.03351012e-03
 8.03517650e-03 6.28259009e-02 2.75994538e-02 6.37683708e-03
 2.84290225e-02] 0.9662562134751689


#### Observations:
1. TBD

In [18]:
# Training a model on 60_Day
regr = RandomForestRegressor(max_depth=5, random_state=0)

X = train_df[updated_feats]
y = train_df['target_60']

regr.fit(X, y)

print(regr.feature_importances_, regr.score(X,y))

test_predictions = regr.predict(test_df[updated_feats])

fig.add_trace(go.Scatter(x=test_df.index+timedelta(days=60),
                         y=test_predictions,
                        line={"color": "blue"},
                        mode="lines",
                        name='Test Predictions (60 Day)'))

[0.01674967 0.00272799 0.01262846 0.00652843 0.00209506 0.00161566
 0.01134567 0.00251443 0.6488142  0.00869551 0.01545593 0.00425438
 0.0027263  0.00126796 0.00141193 0.00102709 0.02909942 0.06350829
 0.02343484 0.04327379 0.10082499] 0.9571111784847924


#### Observations:
1. TBD

In [19]:
# Training a model on 120_Day
regr = RandomForestRegressor(max_depth=5, random_state=0)

X = train_df[updated_feats]
y = train_df['target_120']

regr.fit(X, y)

print(regr.feature_importances_, regr.score(X,y))

test_predictions = regr.predict(test_df[updated_feats])

fig.add_trace(go.Scatter(x=test_df.index+timedelta(days=120),
                         y=test_predictions,
                        line={"color": "blue","dash":"dash"},
                        mode="lines",
                        name='Test Predictions (120 Day)'))

[0.00591957 0.09580901 0.26896215 0.00210429 0.00257402 0.00366349
 0.00363579 0.00314538 0.01294288 0.08515116 0.00631723 0.00195388
 0.00998754 0.00191807 0.00346449 0.00220265 0.36661463 0.03657685
 0.00323366 0.04430448 0.03951876] 0.9752432462140244


#### Observations:
1. TBD

#### Observations:
1. Every model does a fairly good job of predicting the first data point forecasted in the future, but performance drops off significantly from there...potentially an implementation issue
2. What objective function should I actually be using?
3. Should I weight predictions and decisions based on model backtesting performance?

## Part 3: Using past knowledge to re-train our model
#### Given we see every first point we predict is much more aligned to actual test data, we will explore an approach that will allow us to re-train the model every day once we have additional info

In [20]:
new_train_df = train_df
test_preds = []
test_dates = []

# Need to remove last 7 observations due to missing price for 7 days in the future
for i in range(len(test_df)-7):

    # Creating next prediction point
    test_point = test_df[updated_feats].iloc[[i]]

    X = new_train_df[updated_feats]
    y = new_train_df['target_7']

    regr = RandomForestRegressor(max_depth=5, n_estimators=100, random_state=0)

    regr.fit(X, y)

    test_prediction = regr.predict(test_point)[0]
    test_date = test_point.index.values[0]

    test_preds.append(test_prediction)
    test_dates.append(test_date)
    
    # Adding next observation to our training data to simulate re-training a model daily
    new_train_df = pd.concat([new_train_df,test_df.iloc[[i]]])
    
# Adding the final predictions for obersavtions without the target variable
for i in range(len(test_df)-7,len(test_df)):
    
    test_point = test_df[updated_feats].iloc[[i]]
    
    test_prediction = regr.predict(test_point)[0]
    test_date = test_point.index.values[0]
    
    test_preds.append(test_prediction)
    test_dates.append(test_date)


In [21]:
# Creating DF with new preds to allow for easy plotting
new_test_df = pd.DataFrame(data=test_preds,index=test_dates,columns=['7_Day'])

In [22]:
# Plotting our new re-fitted predictions with test data
# Fit is much more appropriate...why??
fig = go.Figure()

fig.add_trace(go.Scatter(x=test_df.index,
                         y=test_df['Close'],
                        line={"color": "#228B22","dash":"dash"},
                        mode="lines",
                        name='Test Actuals'))

fig.add_trace(go.Scatter(x=new_test_df.index+timedelta(days=7),
                         y=new_test_df['7_Day'],
                        line={"color": "red"},
                        mode="lines",
                        name='Test Predictions (7 Day)'))

## Part 4: Picking a subset of tickers and creating models for each
#### In this section, we will make assumptions about tickers and their performance (based on research/tribal knowledge) and build models for each to allow us to start to create a diversified portfolio

In [27]:
# Could pick tickers with lowest industry P/E ratios, but for now only picking popular stocks
# getting all sectors to get largest tickers by volumes
sectors = list(feature_df.sector.unique())

# Creating function to get largest tickers for a sector
def largest_tickers_by_vol(sector):
    return list(feature_df[feature_df['sector']==sector].groupby('ticker').mean('Volume')['Volume'].nlargest(5).index.values)

tickers = []

for sector in sectors:
    tickers.extend(largest_tickers_by_vol(sector))

tickers[:10]

['AAPL', 'MSFT', 'CSCO', 'INTC', 'ORCL', 'GE', 'CSX', 'AAL', 'DAL', 'UAL']

In [28]:
# Setting these manually so I dont have to re-run the rest of the notebook
updated_feats = ['others_cr','trend_ema_slow','volatility_dch','volatility_kcl','volatility_bbm',
 'volatility_kch','trend_sma_slow','trend_sma_fast','volume_vwap','volatility_bbh','SMA_15',
 'volatility_dcm','upperband','trend_ema_fast','trend_ichimoku_a','volatility_kcc','momentum_kama',
 'volatility_bbl','trend_ichimoku_conv','volatility_dcl','trend_kst_sig']

# Should this have the current price as a feature?

In [29]:
# Define function to fit a model that predicts price performance over some time horizon
def fit_model(ticker,train_start_dt,train_end_dt,target=7):

    # Creating sample df for ticker only
    sample_df = feature_df[(feature_df['ticker']==ticker)&(feature_df.index>=train_start_dt)&\
                           (feature_df.index<=train_end_dt)].fillna(method="ffill")

    # Backfilling if forward filling didn't work
    sample_df = feature_df[(feature_df['ticker']==ticker)&(feature_df.index>=train_start_dt)&\
                           (feature_df.index<=train_end_dt)].fillna(method="bfill")

    # Creating target variable to predict prices X days in future
    sample_df[f'target_{target}'] = sample_df['Close_adj'].shift(-1*target)

    # Dropping 7 most recent dates given there is no prediction
    train_df = sample_df.dropna()

    # Fitting my model on inital train_df
    X = train_df[updated_feats]
    y = train_df[f'target_{target}']

    regr = RandomForestRegressor(max_depth=5, n_estimators=100, random_state=0)
    regr.fit(X, y)

    # Getting trading days calendar to add prediction to N trading days in future
    nyse = mcal.get_calendar('NYSE')
    train_end_dt_for_cal = datetime.strptime(train_end_dt,"%Y-%m-%d")
    valid_days = list(nyse.valid_days(start_date=train_end_dt_for_cal, end_date=train_end_dt_for_cal+timedelta(days=30)))

    test_date = valid_days[target].strftime('%Y-%m-%d')
    test_prediction = regr.predict(sample_df[updated_feats].iloc[[-1]])[0]

    return test_date, test_prediction

In [30]:
# Testing out our function with one model creation for AAPL
fit_model('AAPL','2016-01-01','2020-05-27')

('2020-06-05', 77.78914867591017)

In [31]:
# Initializing portolio. start_dt creates a new record on account opening with cash deposit
# Do we need to set a control so you can't trade before start_dt?
start_dt = test_df.index.min().strftime('%Y-%m-%d')
port = portfolio(start_date=start_dt,value=10000)

In [32]:
# Creating model of first iteration for all tickers selected above
train_start_dt = '2016-01-01'
train_end_dt = '2020-05-27'
ticker_preds = pd.DataFrame(columns=['ticker','pred_date','pred_price','curr_price','earn_ratio'])

iteration = 0

for ticker in tickers:

    pred_date, pred_price = fit_model(ticker,train_start_dt,train_end_dt,target=7)
    
    curr_price = port.get_price(date=pred_date,ticker=ticker)
    earn_ratio = pred_price / curr_price - 1
    
    ticker_preds = ticker_preds.append({'ticker':ticker,'pred_date':pred_date,'pred_price':pred_price,\
                                        'curr_price':curr_price,'earn_ratio':earn_ratio},ignore_index=True)
    
ticker_preds.head()

Unnamed: 0,ticker,pred_date,pred_price,curr_price,earn_ratio
0,AAPL,2020-06-05,77.789149,81.316468,-0.043378
1,MSFT,2020-06-05,180.554553,183.116951,-0.013993
2,CSCO,2020-06-05,48.236123,45.816011,0.052822
3,INTC,2020-06-05,62.773814,62.337197,0.007004
4,ORCL,2020-06-05,54.059331,52.649624,0.026775


In [33]:
# Creating algorithm to buy stocks
# Sorting highest to lowest to buy best performing stocks before money runs out
buy_threshold = .05
buy_stocks = ticker_preds[ticker_preds['earn_ratio'] >= buy_threshold].sort_values(by='earn_ratio',ascending=False)

buy_stocks

Unnamed: 0,ticker,pred_date,pred_price,curr_price,earn_ratio
34,NEM,2020-06-05,61.40575,52.532277,0.168915
25,KMI,2020-06-05,17.877736,15.553622,0.149426
44,T,2020-06-05,34.89018,30.430713,0.146545
15,PFE,2020-06-05,36.303281,32.933866,0.102309
16,GILD,2020-06-05,79.018232,72.898391,0.08395
49,PM,2020-06-05,76.384754,71.090334,0.074475
18,AMGN,2020-06-05,231.356784,216.269818,0.06976
2,CSCO,2020-06-05,48.236123,45.816011,0.052822
31,CF,2020-06-05,33.64237,31.976194,0.052107


In [34]:
# Establish weights for buying stocks
buy_ratios = buy_stocks.earn_ratio / buy_stocks.earn_ratio.sum()
buy_ratios

34    0.187619
25    0.165972
44    0.162772
15    0.113637
16    0.093246
49    0.082721
18    0.077485
2     0.058671
31    0.057877
Name: earn_ratio, dtype: float64

In [35]:
# Getting cash amount that can be used to buy
# Can only buy full shares
buy_order = dict(zip(buy_stocks.ticker, (port.current_cash * buy_ratios / buy_stocks.curr_price).astype('int')))
buy_order

{'NEM': 35,
 'KMI': 106,
 'T': 53,
 'PFE': 34,
 'GILD': 12,
 'PM': 11,
 'AMGN': 3,
 'CSCO': 12,
 'CF': 18}

In [36]:
# Buying stock and adding to portfolio
port.buy(buy_order,buy_stocks.pred_date.unique()[0])

In [37]:
# I believe this is not buying more, even though enough cash remains to buy at least 1 share of all,
# due to rounding INTs
port.current_cash

349.15964074014175

In [41]:
# Checking open positions
port.open_positions_dict.keys()

dict_keys(['NEM', 'KMI', 'T', 'PFE', 'GILD', 'PM', 'AMGN', 'CSCO', 'CF'])

In [88]:
# Need to work on so I can do proper selling
port.open_positions_df

Unnamed: 0,Date,Ticker,Quantity,Price
0,2020-06-05,NEM,35,52.532277
1,2020-06-05,KMI,106,15.553622
2,2020-06-05,T,53,30.430713
3,2020-06-05,PFE,34,32.933866
4,2020-06-05,GILD,12,72.898391
5,2020-06-05,PM,11,71.090334
6,2020-06-05,AMGN,3,216.269818
7,2020-06-05,CSCO,12,45.816011
8,2020-06-05,CF,18,31.976194


In [25]:
open_positions_dict

NameError: name 'open_positions_dict' is not defined

In [42]:
# Creating model of first iteration for all tickers selected above to sell (so second iteration)
train_start_dt = '2016-01-01'
next_train_end_dt = (pd.to_datetime(train_end_dt)+timedelta(7)).strftime("%Y-%m-%d")

ticker_preds = pd.DataFrame(columns=['ticker','pred_date','pred_price','curr_price','earn_ratio'])

for ticker in tickers:

    pred_date, pred_price = fit_model(ticker,train_start_dt,next_train_end_dt,target=7)

    curr_price = port.get_price(date=pred_date,ticker=ticker)
    earn_ratio = pred_price / curr_price - 1

    ticker_preds = ticker_preds.append({'ticker':ticker,'pred_date':pred_date,'pred_price':pred_price,\
                                        'curr_price':curr_price,'earn_ratio':earn_ratio},ignore_index=True)


In [43]:
# Establish weights for selling stocks - Not working
# How do we deal with cascading price reductions (e.g. only lose 5%, but time after time)
sell_threshold = -0.2
sell_stocks = ticker_preds[ticker_preds['earn_ratio'] <= sell_threshold]

sell_stocks

Unnamed: 0,ticker,pred_date,pred_price,curr_price,earn_ratio


In [44]:
sell_dict = {}

for ticker in sell_stocks.ticker.unique():
    try: # will try to add positions from open_positions dict to sell order
        sell_dict[ticker] = port.open_positions_df[port.open_positions_df['Ticker']==ticker]['Quantity'].values[0]
    except: # or pass if they do not exist
        pass

In [124]:
sell_dict

{}

In [125]:
port.sell(sell_dict,next_train_end_dt)

In [126]:
port.view_trade_history()

Unnamed: 0_level_0,Order Type,Ticker,Quantity,Ticker Value,Total Trade Value,Remaining Cash
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-06-05,buy,NEM,35,52.532277,1838.629701,8161.370299
2020-06-05,buy,KMI,106,15.553622,1648.683975,6512.686324
2020-06-05,buy,T,53,30.430713,1612.827791,4899.858533
2020-06-05,buy,PFE,34,32.933866,1119.751444,3780.107089
2020-06-05,buy,GILD,12,72.898391,874.780694,2905.326395
2020-06-05,buy,PM,11,71.090334,781.993673,2123.332722
2020-06-05,buy,AMGN,3,216.269818,648.809453,1474.523269
2020-06-05,buy,CSCO,12,45.816011,549.792135,924.731135
2020-06-05,buy,CF,18,31.976194,575.571494,349.159641


In [None]:
for ticker in tickers:

    pred_date, pred_price = fit_model(ticker,train_start_dt,train_end_dt,target=7)
    
    curr_price = port.get_price(date=pred_date,ticker=ticker)
    earn_ratio = pred_price / curr_price - 1
    
    ticker_preds = ticker_preds.append({'ticker':ticker,'pred_date':pred_date,'pred_price':pred_price,\
                                        'curr_price':curr_price,'earn_ratio':earn_ratio},ignore_index=True)

In [46]:
def create_sell_dict(ticker_preds,sell_threshold = -0.2):
    
    # Establish weights for selling stocks - Not working, only selling full positions
    # How do we deal with cascading price reductions (e.g. only lose 5%, but time after time)
    sell_stocks = ticker_preds[ticker_preds['earn_ratio'] <= sell_threshold]

    sell_dict = {}

    for ticker in sell_stocks.ticker.unique():
        try: # will try to add positions from open_positions dict to sell order
            sell_dict[ticker] = port.open_positions_df[port.open_positions_df['Ticker']==ticker]['Quantity'].values[0]
        except: # or pass if they do not exist
            pass

    return sell_dict    

In [47]:
def create_buy_dict(ticker_preds,buy_threshold=0.05):
    
    # Establishing weights to buy stocks
    buy_stocks = ticker_preds[ticker_preds['earn_ratio'] >= buy_threshold].sort_values(by='earn_ratio',ascending=False)

    buy_dict = {}

    # Establish weights for buying stocks
    buy_ratios = buy_stocks.earn_ratio / buy_stocks.earn_ratio.sum()

    # Getting cash amount that can be used to buy; Can only buy full shares
    buy_dict = dict(zip(buy_stocks.ticker, (port.current_cash * buy_ratios / buy_stocks.curr_price).astype('int')))

    # Removing keys where we are not buying any stock
    del_list = []
    for key in buy_dict.keys():
        if buy_dict[key] == 0:
            del_list.append(key)

    for key in del_list:
        del buy_dict[key]

    return buy_dict

In [49]:
# Putting everything above together
# This will assess prices and predictions every 7 days and change positions accordingly

# Initializing portfolio
start_dt = test_df.index.min().strftime('%Y-%m-%d')
port = portfolio(start_date=start_dt,value=10000)

# Creating model of first iteration for all tickers selected above
train_start_dt = '2016-01-01'
train_end_dt = '2020-05-27'

ticker_preds = pd.DataFrame(columns=['ticker','pred_date','pred_price','curr_price','earn_ratio'])

iteration = 0
rebal_interval = 7 # Rebalance (buy/sell) every 7 days

for i in range(0,len(test_df),rebal_interval):
    
    # Changing train end date for every iteration
    next_train_end_dt = (pd.to_datetime(train_end_dt)+timedelta(7)).strftime('%Y-%m-%d')

    # Re-fit every ticker model and predict price in the future
    for ticker in tickers:

        pred_date, pred_price = fit_model(ticker,train_start_dt,next_train_end_dt,target=7)

        curr_price = port.get_price(date=pred_date,ticker=ticker)
        earn_ratio = pred_price / curr_price - 1

        ticker_preds = ticker_preds.append({'ticker':ticker,'pred_date':pred_date,'pred_price':pred_price,\
                                        'curr_price':curr_price,'earn_ratio':earn_ratio},ignore_index=True)
    
    # Creating dictinary of stocks and positions to sell
    sell_dict = create_sell_dict(ticker_preds,sell_threshold = -0.05)
    if not sell_dict:
            pass
    else:
        print(f'Current Selected Sells: {sell_dict}')
        # Selling positions of stocks predicted to decline on day of training (EOD)
        port.sell(sell_dict,next_train_end_dt)
    
    # Creating dictinary of stocks and positions to buy
    buy_dict = create_buy_dict(ticker_preds,buy_threshold=0.05)
    if not buy_dict:
        pass
    else:
        print(f'Current Selected Buys: {buy_dict}')
        # Buying positions of stocks predicted to increase on day of training (EOD)
        port.buy(buy_dict,next_train_end_dt)

    print(port.open_positions_df)

Current Selected Buys: {'T': 29, 'KMI': 52, 'CF': 25, 'GILD': 9, 'MPC': 18, 'PM': 8, 'PFE': 17, 'BAC': 21, 'NRG': 14, 'GM': 16, 'NEM': 7, 'CSCO': 8, 'CSX': 15, 'INTC': 5, 'AES': 21, 'FE': 6, 'SBUX': 3, 'KO': 5, 'MO': 6, 'WMT': 1, 'BSX': 5, 'CTVA': 6, 'ORCL': 3, 'CMCSA': 4}
          Date Ticker Quantity       Price
0   2020-06-03      T       29   29.314095
1   2020-06-03    KMI       52   14.910682
2   2020-06-03     CF       25   30.783998
3   2020-06-03   GILD        9   70.397519
4   2020-06-03    MPC       18   36.718258
5   2020-06-03     PM        8   69.169729
6   2020-06-03    PFE       17   32.888313
7   2020-06-03    BAC       21   25.012320
8   2020-06-03    NRG       14   35.078771
9   2020-06-03     GM       16   28.717500
10  2020-06-03    NEM        7   54.888612
11  2020-06-03   CSCO        8   45.111666
12  2020-06-03    CSX       15   24.367440
13  2020-06-03   INTC        5   60.505828
14  2020-06-03    AES       21   13.130348
15  2020-06-03     FE        6   41.39

KeyboardInterrupt: 

In [28]:
port.view_trade_history().tail()

Unnamed: 0_level_0,Order Type,Ticker,Quantity,Ticker Value,Total Trade Value,Remaining Cash
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-06-03,buy,BSX,0,37.7325,0.0,307.263049
2020-06-03,buy,AMGN,0,215.664436,0.0,307.263049
2020-06-03,buy,CTVA,0,28.040047,0.0,307.263049
2020-06-03,buy,ORCL,0,52.489764,0.0,307.263049
2020-06-03,buy,CMCSA,0,40.434306,0.0,307.263049


#### Action Items
1. Need to create logic to trade on actual trading days (see Exploration v2 notebook)
2. Open positions is not removing positions from the df
3. If no cash remains, process still "buys" zero shares

In [170]:
di

if {}:
    print('empty')