In [1]:
#All imports

import yfinance as yf
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sqlite3
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler
from datetime import datetime
import mplfinance as mpf

In [2]:
def plot_candle(symbol, interval, period):
    #Candlestick plotter using matplotlib finance
    ticker = yf.Ticker(symbol).history(interval = interval, period = period, prepost = True)
    mpf.plot(ticker, type = 'candle', style = 'charles', ylabel = 'Price')

In [3]:
def plot_line(symbol, interval, period):
    #LIne graph plotter using matplotlib finance library
    ticker = yf.Ticker(symbol).history(interval = interval, period = period, prepost = True)
    mpf.plot(ticker, type = 'line', style = 'charles', ylabel = 'Price')

In [4]:
def create_ticker(symbol, interval, period):
    #Returns pandas dataframe with datetime, open, close, high, low, volume etc.
    ticker = yf.Ticker(symbol).history(interval = interval, period = period, prepost = True)
    # Reset the index to convert it to a column
    ticker.reset_index(inplace=True)
    return ticker

In [5]:
#test ticker
ticker = create_ticker("AAPL", '1d', '1y')

In [8]:
def add_sma(ticker):
    #adds simple moving averages
    # Add a 20-day moving average
    ticker['SMA_20'] = ticker['Close'].rolling(window=20).mean()

    # Add a 50-day moving average
    ticker['SMA_50'] = ticker['Close'].rolling(window=50).mean()

    # Add a 200-day moving average
    ticker['SMA_200'] = ticker['Close'].rolling(window=200).mean()
    return ticker

In [9]:
def add_emv(ticker):
    # Add a 20-day exponential moving average
    ticker['EMA_20'] = ticker['Close'].ewm(span=20, adjust=False).mean()

    # Add a 50-day exponential moving average
    ticker['EMA_50'] = ticker['Close'].ewm(span=50, adjust=False).mean()

    # Add a 200-day exponential moving average
    ticker['EMA_200'] = ticker['Close'].ewm(span=200, adjust=False).mean()
    return ticker


In [10]:
def separate_date(ticker):
#Separate dates for future plotting
    train_dates = pd.to_datetime(ticker['Date'])
    return train_dates

In [11]:
ticker = add_sma(ticker)
ticker = add_emv(ticker)

In [20]:
def split_data(ratio):
    #Determine the split index
    index = int(len(ticker) * ratio) 
    
    # Split the dataset into training and testing sets
    df_train = ticker[:split_index]
    df_test = ticker[split_index:]
    df_train = df_train[cols].astype(float)
    df_test = df_test[cols].astype(float)
    
    return df_train, df_test


In [21]:
def normalise(df_train, df_test):
    scaler = StandardScaler()
    scaler = scaler.fit(df_train)
    df_train_scaled = scaler.transform(df_train)
    df_test_scaled = scaler.transform(df_test)
    return df_train_scaled, df_test_scaled

In [30]:
def preprocess_data(data, n_past, n_future, target):
    """
    Preprocess the data into the format required for LSTM.
    
    Parameters:
    - data: Scaled data
    - n_past: Number of past days to use for prediction
    - n_future: Number of future days to predict
    
    Returns:
    - X: Input data for LSTM
    - Y: Output data for LSTM
    """
    X, Y = [], []

    for i in range(n_past, len(data) - n_future + 1):
        X.append(data[i - n_past:i, 0:data.shape[1]])
        Y.append(data[i + n_future - 1:i + n_future, target])

    return np.array(X), np.array(Y)

In [17]:
cols = ['Open', 'High', 'Low', 'Close', 'Volume', 'EMA_20', 'EMA_50', 'EMA_200']

In [24]:
n_future = 3   # Number of days we want to look into the future based on the past days
n_past = 30    # Number of past days we want to use to predict the future


In [25]:
def preprocess_data_p2(df_train_scaled, df_test_scaled, n_past, n_future):
    trainX, trainY = preprocess_data(df_train_scaled, n_past, n_future)
    testX, testY = preprocess_data(df_test_scaled, n_past, n_future)
    return trainX, trainY, testX, testY

In [27]:
def fit_LSTM(trainX, trainY, testX, testY):
    model = Sequential()
    model.add(LSTM(64, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
    model.add(LSTM(32, activation='relu', return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(trainY.shape[1]))

    model.compile(optimizer='adam', loss='mse')
    model.summary()

    # Fit the model
    history = model.fit(trainX, trainY, epochs=5, batch_size=16, validation_split=0.1, verbose=1)

    plt.plot(history.history['loss'], label='Training loss')
    plt.plot(history.history['val_loss'], label='Validation loss')
    plt.legend()
    plt.show()

In [31]:
def predict(days_for_prediction, target):
    prediction = model.predict(testX[-days_for_prediction:]) 
    #Rescale back to original range
    prediction_copies = np.repeat(prediction, df_for_testing.shape[1], axis=-1)
    y_pred_future = scaler.inverse_transform(prediction_copies)[:, target]
    return y_pred_future