In [18]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
# Use the MinMaxScaler to scale data between 0 and 1.
from sklearn.preprocessing import MinMaxScaler

from datetime import datetime, timedelta

import os
import plotly.express as px
import pandas as pd
import numpy as np
import hvplot.pandas
import alpaca_trade_api as tradeapi
from pathlib import Path
from pandas_datareader import data as wb
import matplotlib.pyplot as plt
import seaborn as sns

# Import required Keras modules
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load .env environment variables
from dotenv import load_dotenv
load_dotenv("classkeys.env")

%matplotlib inline

In [19]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2"
)

In [20]:
def window_data(df, window, feature_col_number, target_col_number):
    """
    This function accepts the column number for the features (X) and the target (y).
    It chunks the data up with a rolling window of Xt - window to predict Xt.
    It returns two numpy arrays of X and y.
    """
    X = []
    y = []
    for i in range(len(df) - window):
        features = df.iloc[i : (i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [21]:
# Set the Stock ticker
tickers = ["FSR"]

In [24]:
def run_LSTM(t):
    timestamp = datetime.now()
    print(f"Start running model for {t}")
    # Set the Stock ticker
    tickers = [t]

    # Set timeframe to '1D'
    timeframe = "1D"

    # Set start and end datetimes
    start_date = pd.Timestamp("2022-01-11", tz="America/New_York").isoformat()
    end_date = pd.Timestamp("2022-01-21", tz="America/New_York").isoformat()

    # Get data for ticker
    fsr_df = alpaca.get_barset(
        tickers,
        timeframe,
        start=start_date,
        end=end_date,
        limit=1000,
    ).df

    df = fsr_df

    # Creating the features (X) and target (y) data using the window_data() function.
    window_size = 5

    feature_column = 2
    target_column = 2
    X, y = window_data(df, window_size, feature_column, target_column)
    print (f"X sample values:\n{X[:5]} \n")
    print (f"y sample values:\n{y[:5]}")

    # Use 70% of the data for training and the remainder for testing
    split = int(0.7 * len(X))
    X_train = X[: split]
    X_test = X[split:]
    y_train = y[: split]
    y_test = y[split:]


    # Create a MinMaxScaler object
    scaler = MinMaxScaler()

    # Fit the MinMaxScaler object with the training feature data X_train
    scaler.fit(X_train)

    # Scale the features training and testing sets
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    # Fit the MinMaxScaler object with the training target data y_train
    scaler.fit(y_train)

    # Scale the target training and testing sets
    y_train = scaler.transform(y_train)
    y_test = scaler.transform(y_test)

    # Reshape the features for the model
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    print (f"X_train sample values:\n{X_train[:5]} \n")
    print (f"X_test sample values:\n{X_test[:5]}")


    # Define the LSTM RNN model.
    model = Sequential()

    number_units = 5
    dropout_fraction = 0.2

    # Layer 1
    model.add(LSTM(
        units=number_units,
        return_sequences=True,
        input_shape=(X_train.shape[1], 1))
        )
    model.add(Dropout(dropout_fraction))
    # Layer 2
    model.add(LSTM(units=number_units, return_sequences=True))
    model.add(Dropout(dropout_fraction))
    # Layer 3
    model.add(LSTM(units=number_units))
    model.add(Dropout(dropout_fraction))
    # Output layer
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer="adam", loss="mean_squared_error")
    # Train the model
    model.fit(X_train, y_train, epochs=10, shuffle=False, batch_size=1, verbose=1)


    # Evaluate the model
    model.evaluate(X_test, y_test)

    # Make some predictions
    predicted = model.predict(X_test)
    # Recover the original prices instead of the scaled version
    predicted_prices = scaler.inverse_transform(predicted)
    real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

    # Create a DataFrame of Real and Predicted values
    stocks = pd.DataFrame({
        "Real": real_prices.ravel(),
        "Predicted": predicted_prices.ravel()
        }, index = df.index[-len(real_prices): ])
    stocks.head()
    
    print(f"Prediction for {t} is {predicted}.")
    print("Run time: ", datetime.now() - timestamp)
    return stocks, predicted

In [26]:
# demo of the function, just to run once
stockframe, prediction = run_LSTM("FSR")

Start running model for FSR
X sample values:
[[15.16   15.4    14.98   14.2609 14.08  ]
 [15.4    14.98   14.2609 14.08   13.39  ]
 [14.98   14.2609 14.08   13.39   12.7   ]] 

y sample values:
[[13.39]
 [12.7 ]
 [11.93]]
X_train sample values:
[[[0.]
  [1.]
  [1.]
  [1.]
  [1.]]

 [[1.]
  [0.]
  [0.]
  [0.]
  [0.]]] 

X_test sample values:
[[[-0.75      ]
  [-1.71214286]
  [-0.25156446]
  [-3.81426202]
  [-1.        ]]]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Prediction for FSR is [[-0.00347471]].
Run time:  0:00:07.049760


In [83]:
# Set the Stock ticker
tickers = ["AMZN" , "TWTR" , "GOOGL" , "FB" , "MSFT" , "AAPL" , "TSLA" , "FSR" , "NVDA" , "INTC"]

# Set timeframe to '1D'
timeframe = "1D"

# Set start and end datetimes
start_date = pd.Timestamp("2020-01-02", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2022-01-21", tz="America/New_York").isoformat()

# Get data for aamzn ticker
stock_df = alpaca.get_barset(
    tickers,
    timeframe,
    start=start_date,
    end=end_date,
    limit=1000,
).df
stock_df.head()

# Daily  returns contain closing price of all stock
nvda_closing_prices = pd.DataFrame()

# fetch closing prices
nvda_closing_prices["NVDA"] = stock_df["NVDA"]["close"]

# Drop the time component of the date
nvda_closing_prices.index = nvda_closing_prices.index.date

# Compute daily returns
nvda_daily_returns = nvda_closing_prices.pct_change().dropna()
nvda_daily_returns.head()

# Create and empty DataFrame for closing prices
amzn_closing_prices = pd.DataFrame()

# fetch closing prices
amzn_closing_prices["AMZN"] = stock_df["AMZN"]["close"]

# Drop the time component of the date
amzn_closing_prices.index = amzn_closing_prices.index.date

# Compute daily returns
amzn_daily_returns = amzn_closing_prices.pct_change().dropna()
amzn_daily_returns.head()

# Create and empty DataFrame for closing prices
aapl_closing_prices = pd.DataFrame()

# fetch closing prices
aapl_closing_prices["AAPL"] = stock_df["AAPL"]["close"]

# Drop the time component of the date
aapl_closing_prices.index = aapl_closing_prices.index.date

# Compute daily returns
aapl_daily_returns = aapl_closing_prices.pct_change().dropna()
aapl_daily_returns.head()

# Create and empty DataFrame for closing prices
tsla_closing_prices = pd.DataFrame()

# fetch closing prices
tsla_closing_prices["TSLA"] = stock_df["TSLA"]["close"]

# Drop the time component of the date
tsla_closing_prices.index = tsla_closing_prices.index.date

# Compute daily returns
tsla_daily_returns = tsla_closing_prices.pct_change().dropna()
tsla_daily_returns.head()

# Create and empty DataFrame for closing prices
googl_closing_prices = pd.DataFrame()

# fetch closing prices
googl_closing_prices["GOOGL"] = stock_df["GOOGL"]["close"]

# Drop the time component of the date
googl_closing_prices.index = googl_closing_prices.index.date

# Compute daily returns
googl_daily_returns = googl_closing_prices.pct_change().dropna()
googl_daily_returns.head()

# Create and empty DataFrame for closing prices
fb_closing_prices = pd.DataFrame()

# fetch closing prices
fb_closing_prices["FB"] = stock_df["FB"]["close"]

# Drop the time component of the date
fb_closing_prices.index = fb_closing_prices.index.date

# Compute daily returns
fb_daily_returns =fb_closing_prices.pct_change().dropna()
fb_daily_returns.head()

# Create and empty DataFrame for closing prices
msft_closing_prices = pd.DataFrame()

# fetch closing prices
msft_closing_prices["MSFT"] = stock_df["MSFT"]["close"]

# Drop the time component of the date
msft_closing_prices.index = msft_closing_prices.index.date

# Compute daily returns
msft_daily_returns = msft_closing_prices.pct_change().dropna()
msft_daily_returns.head()

# Create and empty DataFrame for closing prices
twtr_closing_prices = pd.DataFrame()

# fetch closing prices
twtr_closing_prices["TWTR"] = stock_df["TWTR"]["close"]

# Drop the time component of the date
twtr_closing_prices.index =twtr_closing_prices.index.date

# Compute daily returns
twtr_daily_returns = twtr_closing_prices.pct_change().dropna()
twtr_daily_returns.head()

# Create and empty DataFrame for closing prices
fsr_closing_prices = pd.DataFrame()

# fetch closing prices
fsr_closing_prices["FSR"] = stock_df["FSR"]["close"]

# Drop the time component of the date
fsr_closing_prices.index =fsr_closing_prices.index.date

# Compute daily returns
fsr_daily_returns = fsr_closing_prices.pct_change().dropna()
fsr_daily_returns.head()

# Create and empty DataFrame for closing prices
intc_closing_prices = pd.DataFrame()

# fetch closing prices
intc_closing_prices["INTC"] = stock_df["INTC"]["close"]

# Drop the time component of the date
intc_closing_prices.index =intc_closing_prices.index.date

# Compute daily returns
intc_daily_returns = intc_closing_prices.pct_change().dropna()
intc_daily_returns.head()


stock_daily_returns_df = pd.concat([intc_daily_returns,fsr_daily_returns, twtr_daily_returns, msft_daily_returns, aapl_daily_returns, amzn_daily_returns, googl_daily_returns, tsla_daily_returns, fb_daily_returns, nvda_daily_returns], axis=1, join="inner")

# Calculating volatility
volatility = stock_daily_returns_df.std().sort_values()

# Splitting up the list of stocks by sorted volatilities
split = round(len(volatility) / 3) 

high = volatility[len(volatility) - split:]
mid = volatility[len(volatility) - 2 * split :len(volatility) - split]
low = volatility[:len(volatility) - 2 * split]

mid_list = mid.index.tolist()
print(mid_list)

high_list = high.index.tolist()
print(high_list)

low_list = low.index.tolist()
print(low_list)


['FB', 'INTC', 'TWTR']
['TSLA', 'NVDA', 'FSR']
['MSFT', 'GOOGL', 'AMZN', 'AAPL']


In [38]:
tech_stocks = ["AMZN" , "TWTR" , "GOOGL" , "FB" , "MSFT" , "AAPL" , "TSLA" , "FSR" , "NVDA" , "INTC"]


In [39]:
Collection = dict()

for i in tech_stocks:
    stockframe, prediction = run_LSTM(i)
    Collection[i] = [stockframe, prediction]

Start running model for AMZN
X sample values:
[[3215.28 3288.34 3221.82 3196.01 3153.29]
 [3288.34 3221.82 3196.01 3153.29 3125.  ]
 [3221.82 3196.01 3153.29 3125.   3027.02]] 

y sample values:
[[3125.   ]
 [3027.02 ]
 [2841.408]]
X_train sample values:
[[[0.]
  [1.]
  [1.]
  [1.]
  [1.]]

 [[1.]
  [0.]
  [0.]
  [0.]
  [0.]]] 

X_test sample values:
[[[ 0.08951547]
  [-0.38800361]
  [-1.65517241]
  [-0.6622191 ]
  [-3.46341463]]]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Prediction for AMZN is [[0.07799679]].
Run time:  0:00:07.266029
Start running model for TWTR
X sample values:
[[39.635 39.77  38.61  37.51  37.13 ]
 [39.77  38.61  37.51  37.13  37.005]
 [38.61  37.51  37.13  37.005 37.195]] 

y sample values:
[[37.005]
 [37.195]
 [34.795]]
X_train sample values:
[[[0.]
  [1.]
  [1.]
  [1.]
  [1.]]

 [[1.]
  [0.]
  [0.]
  [0.]
  [0.]]] 

X_test sample values:
[[[-7.59259259]
  [-0.94827586]
  [-0.34545455]
  [-0.328

In [40]:
for key in Collection:
    print(key)

AMZN
TWTR
GOOGL
FB
MSFT
AAPL
TSLA
FSR
NVDA
INTC


In [41]:
Collection['MSFT'][0]

Unnamed: 0_level_0,Real,Predicted
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-21 00:00:00-05:00,295.61,301.153412


In [48]:
extract_frames = []

for i in Collection:
    df = Collection[i][0].copy()
    df['ticker'] = i
    extract_frames.append(df)
    

In [50]:
result_df = pd.concat(extract_frames)

In [57]:
result_df.rename(columns = {
    'Real' : "Today's Actuals",
    'Predicted' : "Tomorrow's Prediction",
}, inplace=True)

In [60]:
result_df['Predicted Returns'] = (result_df["Tomorrow's Prediction"] - result_df["Today's Actuals"]) / result_df["Today's Actuals"]

In [73]:
result_df = result_df.reset_index()
result_df.head(5)

Unnamed: 0,time,Today's Actuals,Tomorrow's Prediction,ticker,Predicted Returns
0,2022-01-21 00:00:00-05:00,2841.408,3034.662109,AMZN,0.068014
1,2022-01-21 00:00:00-05:00,34.795,36.994144,TWTR,0.063203
2,2022-01-21 00:00:00-05:00,2601.73,2660.546387,GOOGL,0.022607
3,2022-01-21 00:00:00-05:00,303.04,316.069183,FB,0.042995
4,2022-01-21 00:00:00-05:00,295.61,301.153412,MSFT,0.018752


In [75]:
location = result_df[result_df['Predicted Returns'] == result_df['Predicted Returns'].max()].index[0]

In [76]:
ticker_suggestion = result_df.loc[location, "ticker"]

In [79]:
print(f"Based on your risk tolerance from our survey analytics, we are recommending you to buy {ticker_suggestion} now and sell it tomorrow.")

Based on your risk tolerance from our survey analytics, we are recommending you to buy FSR now and sell it tomorrow.
