# LSTM Model Training

### Import Libraries, Download Data, Stock Data

In [1]:
import sys
!"{sys.executable}" -m pip uninstall -y numpy
!"{sys.executable}" -m pip install numpy
!{sys.executable} -m pip install pandas yfinance scikit-learn tensorflow matplotlib

Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
Defaulting to user installation because normal site-packages is not writeable
Collecting numpy
  Using cached numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl (5.3 MB)
Installing collected packages: numpy
Successfully installed numpy-2.0.2
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable
Collecting pandas
  Downloading pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl (11.3 MB)
[K     |████████████████████████████████| 11.3 MB 1.2 MB/s eta 0:00:01
[?25hCollecting yfinance
  Downloading yfinance-0.2.61-py2.py3-none-any.whl (117 kB)
[K     |████████████████████████████████| 117 kB 1.5 MB/s eta 0:00:01
[?25hCollecting scikit-learn
  Downloading scikit_learn-1.6.1-cp39-cp39-macosx_12_0_arm64.whl (11.1 MB)
[K     |█████████████████

In [2]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

Matplotlib is building the font cache; this may take a moment.


In [3]:
ticker = "AAPL"
data = yf.download(ticker, start="2010-01-01", end="2023-11-13")
df = data[['Close']].reset_index()

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


### Preprocess Data and Create Sequences

In [4]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1, 1))

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:(i + seq_length), 0])
        y.append(data[i + seq_length, 0])
    return np.array(X), np.array(y)

seq_length = 60
X, y = create_sequences(scaled_data, seq_length)

### Split and Reshape Data

In [5]:
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

### Build and Train Model

In [6]:
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
    LSTM(50, return_sequences=False),
    Dense(25),
    Dense(1)
])
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=32, epochs=100, validation_split=0.1, verbose=1)

Epoch 1/100


  super().__init__(**kwargs)


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 0.0017 - val_loss: 0.0012
Epoch 2/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 3.2589e-05 - val_loss: 7.7399e-04
Epoch 3/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 3.2475e-05 - val_loss: 8.7390e-04
Epoch 4/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 3.0844e-05 - val_loss: 6.6293e-04
Epoch 5/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - loss: 2.9256e-05 - val_loss: 0.0015
Epoch 6/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 3.3971e-05 - val_loss: 6.8659e-04
Epoch 7/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 2.5383e-05 - val_loss: 6.1588e-04
Epoch 8/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 2.0619e-05 - val_loss: 0.0013
Epoc

<keras.src.callbacks.history.History at 0x156416610>

### Make Predictions and RMSE

In [7]:
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))
rmse = np.sqrt(np.mean((predictions - y_test)**2))
print(f"LSTM RMSE: {rmse}")

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
LSTM RMSE: 3.6201784970557007
