# This script gets 5 accuracies for each stock, by training the LSTM 5 different times per stock and making the predictions
* accuracy is computed by seeing how the trading algorithm would performed over different time intervals (1 and 2 yrs) compared to how you would do if you just bought and held the stock, full analysis in stocks.LSTM

In [8]:
import pandas as pd
import numpy as np
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.layers import Bidirectional


sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

yf.pdr_override()


tech_list = ['TSLA', 'AAPL', 'GE', 'MARA', 'SRPT', 'ABR', 'MSFT', 'AMZN', 'JPM', 'NVDA', 'JNJ', 'IBM', 'KO', 'VZ', 'WMT', 'JPM', 'PG', 'CVX', 'PEP', 'MMM', 'XOM', 'BA', 'CSCO', 'INTC', 'RTX', 'CAT', 'GS', 'HD', 'NKE', 'DIS']


for stock_symbol in tech_list:
    print(stock_symbol)
    print("\n")
    # Set up End and Start times for data grab
    end = datetime.now()
    start = datetime(end.year - 1, end.month, end.day)

    # Get stock data
    stock_data = yf.download(stock_symbol, start, end)


    for i in range(5):
        # get stock quote, specify start date ****
        df = pdr.get_data_yahoo(stock_symbol, start='2014-04-01', end=datetime.now())

        # create a dataframe with close
        data = df.filter(['Close'])

        # convert dataframe to numpy array
        dataset = data.values

        # get the # of rows to train the model on, how much to save for validation ***

        # 2014-04-01, and .89
        training_data_len = int(np.ceil( len(dataset) * .89 ))

        # scale the data
        scaler = MinMaxScaler(feature_range=(0,1))
        scaled_data = scaler.fit_transform(dataset)

        # create the training data set 
        train_data = scaled_data[0:int(training_data_len), :]

        # split the data into x_train and y_train data sets
        x_train = []
        y_train = []

        for i in range(60, len(train_data)):
            x_train.append(train_data[i-60:i, 0])
            y_train.append(train_data[i, 0])

        # convert x_train and y_train to numpy arrays 
        x_train, y_train = np.array(x_train), np.array(y_train)

        # reshape the data
        x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

        # build the Bidirectional LSTM model
        model = Sequential()
        model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=(x_train.shape[1], 1)))
        model.add(Bidirectional(LSTM(64, return_sequences=False)))
        model.add(Dense(25))
        model.add(Dense(1))

        # compile the model
        model.compile(optimizer='adam', loss='mean_squared_error')

        # train the model
        model.fit(x_train, y_train, batch_size=1, epochs=1)

        # create a new array containing scaled values from index 1543 to 2002 
        test_data = scaled_data[training_data_len - 60: , :]

        # create the data sets x_test and y_test
        x_test = []
        y_test = dataset[training_data_len:, :]
        for i in range(60, len(test_data)):
            x_test.append(test_data[i-60:i, 0])

        # convert the data to a numpy array
        x_test = np.array(x_test)

        # reshape the data
        x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

        # get the models predicted price values 
        predictions = model.predict(x_test)
        predictions = scaler.inverse_transform(predictions)

        # get the root mean squared error (RMSE)
        rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))


        # plot the data
        train = data[:training_data_len]
        valid = data[training_data_len:].copy()
        valid['Predictions'] = predictions

        # visualize the data
        plt.figure(figsize=(16,6))
        plt.title('Model')
        plt.xlabel('Date', fontsize=18)
        plt.ylabel('Close Price USD ($)', fontsize=18)
        plt.plot(train['Close'])
        plt.plot(valid[['Close', 'Predictions']])
        plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')

        actualClose = valid['Close']
        predictedClose = valid['Predictions']
        actualPercentChange = valid['Close'].pct_change()
        predictedPercentChange = valid['Predictions'].pct_change()

        initial_investment = 100
        capital = initial_investment
        position = 0

        data = {'Date': [], 'Action': [], 'Investment': [], 'Buy & Hold': [], 'Actual Close': [], 'Predicted Close': []}

        for i in range(len(predictedClose) - 1):
            if predictedClose[i] <= predictedClose[i + 1] and position == 0:
                position = capital / actualClose[i]
                capital = 0
                action = "Buy"
            elif predictedClose[i] >= predictedClose[i + 1] and position > 0:
                capital = position * actualClose[i]
                position = 0
                action = "Sell"
            else:
                action = "Hold"

            total_investment = capital + (position * actualClose[i])
            buy_and_hold = initial_investment * (actualClose[i] / actualClose[0])

            data['Date'].append(actualClose.index[i])
            data['Action'].append(action)
            data['Investment'].append(round(total_investment, 2))
            data['Buy & Hold'].append(round(buy_and_hold, 2))
            data['Actual Close'].append(round(actualClose[i], 2))
            data['Predicted Close'].append(round(predictedClose[i], 2))

        results_df = pd.DataFrame(data)

        # Apply formatting to the DataFrame
        styled_results_df = results_df.style.format({'Date': '{:%Y-%m-%d}',
                                                     'Investment': '{:.2f}',
                                                     'Buy & Hold': '{:.2f}',
                                                     'Actual Close': '{:.2f}',
                                                     'Predicted Close': '{:.2f}'})


        final_investment = results_df.iloc[-1]['Investment']
        final_buy_and_hold = results_df.iloc[-1]['Buy & Hold']

        print(f"Final Investment: {final_investment:.2f}")
        print(f"Final Buy & Hold: {final_buy_and_hold:.2f}")





TSLA


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


KeyboardInterrupt: 