In [14]:
!pip install yfinance numpy tensorflow ta



In [12]:
import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, Input
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [13]:
# Function to download data and add technical indicators
def download_and_structure(ticker):
    start_date = "2018-01-01"
    end_date = "2021-12-31"

    # Download data
    data = yf.download(ticker, start=start_date, end=end_date)
    data = data.reset_index(drop=False)

    # Create the target: next day's closing price
    data['target'] = data['Close'].shift(-1)

    # Adding technical indicators
    data['MA30'] = data['Close'].rolling(window=30).mean()
    data['MA7'] = data['Close'].rolling(window=7).mean()
    data['RSI'] = ta.momentum.RSIIndicator(data['Close']).rsi()
    data['EMA50'] = data['Close'].ewm(span=50, adjust=False).mean()
    data['MACD'] = ta.trend.MACD(data['Close']).macd_diff()
    data['Bollinger_High'] = ta.volatility.BollingerBands(data['Close']).bollinger_hband()
    data['Bollinger_Low'] = ta.volatility.BollingerBands(data['Close']).bollinger_lband()
    data['Stochastic_Oscillator'] = ta.momentum.StochasticOscillator(data['High'], data['Low'], data['Close']).stoch()
    data['Williams_R'] = ta.momentum.WilliamsRIndicator(data['High'], data['Low'], data['Close']).williams_r()
    data['OBV'] = ta.volume.OnBalanceVolumeIndicator(data['Close'], data['Volume']).on_balance_volume()

    # Drop rows with missing values
    data = data.dropna()

    return data

# Prepare data for LSTM with feature scaling and creating sequences
def prepare_data(df, n_steps):
    features = df[['Open', 'High', 'Low', 'Close', 'Volume', 'MA30', 'MA7', 'RSI', 'EMA50', 'MACD', 'Bollinger_High', 'Bollinger_Low', 'Stochastic_Oscillator', 'Williams_R', 'OBV']].values
    targets = df['target'].values

    # Scale the features and target
    scaler_features = MinMaxScaler(feature_range=(0, 1))
    features_scaled = scaler_features.fit_transform(features)

    scaler_target = MinMaxScaler(feature_range=(0, 1))
    targets_scaled = scaler_target.fit_transform(targets.reshape(-1, 1))

    # Create sequences for LSTM model
    x, y = [], []
    for i in range(len(features_scaled) - n_steps):
        x.append(features_scaled[i:i + n_steps])
        y.append(targets_scaled[i + n_steps])

    return np.array(x), np.array(y), scaler_features, scaler_target

# Create an LSTM model with more complexity (more layers and units)
def create_lstm_model(input_shape):
    model = Sequential()

    model.add(Input(shape=input_shape))

    # Add multiple LSTM layers with more units
    model.add(Bidirectional(LSTM(units=128, return_sequences=True)))
    model.add(Dropout(0.3))

    model.add(Bidirectional(LSTM(units=64, return_sequences=True)))
    model.add(Dropout(0.3))

    model.add(Bidirectional(LSTM(units=32, return_sequences=False)))
    model.add(Dropout(0.3))

    # Dense output layer with linear activation
    model.add(Dense(units=1, activation='linear'))

    # Compile the model
    optimizer = RMSprop(learning_rate=0.0001)  # Reduced learning rate
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])

    return model


In [14]:
# Train and evaluate the model using TimeSeriesSplit and a longer sequence length (n_steps)
def train_and_evaluate(ticker, n_steps=120, test_size=0.2):
    # Download and preprocess the data
    df = download_and_structure(ticker)

    # Prepare the data for the LSTM model
    x, y, scaler_features, scaler_target = prepare_data(df, n_steps)

    # TimeSeriesSplit for rolling cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    for train_index, test_index in tscv.split(x):
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Reshape input data to fit LSTM [samples, time steps, features]
        x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], x_train.shape[2]))
        x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], x_test.shape[2]))

        # Create and compile the LSTM model with increased complexity
        model = create_lstm_model((x_train.shape[1], x_train.shape[2]))

        # Early stopping and learning rate reduction
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

        # Train the model with validation split and callbacks
        model.fit(x_train, y_train, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stop, lr_scheduler])

        # Make predictions on the test data
        y_pred = model.predict(x_test)

        # Inverse transform the predictions and actual values
        y_pred_rescaled = scaler_target.inverse_transform(y_pred)
        y_test_rescaled = scaler_target.inverse_transform(y_test)

        # Evaluate the model performance with RMSE and MAE
        rmse = np.sqrt(np.mean((y_pred_rescaled - y_test_rescaled) ** 2))
        mae = np.mean(np.abs(y_pred_rescaled - y_test_rescaled))

        print(f"RMSE: {rmse:.2f}")
        print(f"MAE: {mae:.2f}")

    return model, scaler_features, scaler_target, df


In [8]:
df = download_and_structure('AAPL')
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,target,MA30,MA7,RSI,EMA50,MACD,Bollinger_High,Bollinger_Low,Stochastic_Oscillator,Williams_R,OBV
33,2018-02-20,43.012501,43.564999,42.855,42.962502,40.637375,135722000,42.767502,42.434833,41.717857,56.099084,42.391802,0.425262,44.69665,38.474099,87.917002,-12.082998,-226217600
34,2018-02-21,43.2075,43.529999,42.752499,42.767502,40.452927,149886400,43.125,42.4075,42.241429,54.722385,42.406535,0.414815,44.427344,38.594156,84.743695,-15.256305,-376104000
35,2018-02-22,42.950001,43.487499,42.927502,43.125,40.791092,123967600,43.875,42.39225,42.591072,56.814776,42.43471,0.414142,44.351075,38.627425,90.561403,-9.438597,-252136400
36,2018-02-23,43.4175,43.912498,43.384998,43.875,41.500496,135249600,44.7425,42.402333,42.989643,60.89738,42.491192,0.443735,44.54153,38.54672,99.409705,-0.590295,-116886800
37,2018-02-26,44.087502,44.8475,44.052502,44.7425,42.321056,152648800,44.5975,42.433083,43.403929,65.016959,42.579479,0.497234,44.899276,38.375474,98.559183,-1.440817,35762000


In [15]:
# Function to predict stock price for specific dates
def predict_for_dates(model, scaler_features, scaler_target, df, dates, n_steps=120):
    df['Date'] = pd.to_datetime(df['Date'])

    @tf.function(reduce_retracing=True)
    def model_predict(features_scaled):
        return model(features_scaled)

    for date_str in dates:
        date = pd.to_datetime(date_str)

        if date not in df['Date'].values:
            print(f"Date {date_str} is not in the available data range.")
            continue

        data_until_date = df[df['Date'] <= date].tail(n_steps)

        if len(data_until_date) < n_steps:
            print(f"Not enough historical data available up to {date_str} to make a prediction.")
            continue

        features = data_until_date[['Open', 'High', 'Low', 'Close', 'Volume', 'MA30', 'MA7', 'RSI', 'EMA50', 'MACD', 'Bollinger_High', 'Bollinger_Low', 'Stochastic_Oscillator', 'Williams_R', 'OBV']].values
        features_scaled = scaler_features.transform(features)
        features_scaled = np.reshape(features_scaled, (1, features_scaled.shape[0], features_scaled.shape[1]))

        predicted_price_scaled = model_predict(features_scaled)
        predicted_price = scaler_target.inverse_transform(predicted_price_scaled)

        print(f"Predicted closing price for {date_str}: {predicted_price[0][0]:.2f}")

In [17]:
model, scaler_features, scaler_target, df = train_and_evaluate('AAPL', n_steps=80)
predict_for_dates(model, scaler_features, scaler_target, df, ['2020-12-31', '2021-12-01'])

[*********************100%***********************]  1 of 1 completed

Epoch 1/50





[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 483ms/step - loss: 0.0117 - mae: 0.0873 - val_loss: 3.4726e-04 - val_mae: 0.0139 - learning_rate: 1.0000e-04
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step - loss: 0.0049 - mae: 0.0560 - val_loss: 4.0012e-04 - val_mae: 0.0155 - learning_rate: 1.0000e-04
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - loss: 0.0044 - mae: 0.0522 - val_loss: 4.1104e-04 - val_mae: 0.0160 - learning_rate: 1.0000e-04
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - loss: 0.0043 - mae: 0.0535 - val_loss: 2.6084e-04 - val_mae: 0.0125 - learning_rate: 1.0000e-04
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step - loss: 0.0042 - mae: 0.0516 - val_loss: 2.8967e-04 - val_mae: 0.0133 - learning_rate: 5.0000e-05
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step - loss: 0.0029 - 

In [9]:
df[df['Date'] == '2020-12-31']

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,target,MA30,MA7,RSI,EMA50,MACD,Bollinger_High,Bollinger_Low,Stochastic_Oscillator,Williams_R,OBV
755,2020-12-31,134.080002,134.740005,131.720001,132.690002,129.894318,99116600,129.410004,124.503,133.254288,62.738934,123.08475,0.458586,137.100152,118.358848,66.55705,-33.44295,3263368200


In [11]:
df[df['Date'] == '2021-12-01']

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,target,MA30,MA7,RSI,EMA50,MACD,Bollinger_High,Bollinger_Low,Stochastic_Oscillator,Williams_R,OBV
986,2021-12-01,167.479996,170.300003,164.529999,164.770004,162.292801,152052500,163.759995,153.358335,161.641432,69.869847,151.958465,0.892873,166.621543,143.89446,75.766884,-24.233116,3827696100
