In [1]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import yfinance as yf
import pandas as pd
import numpy as np
import datetime
from datetime import date, timedelta

In [2]:
ticker = 'AAPL'
# Download historical data from Yahoo Finance for Apple (Example ticker)
data = yf.download(ticker, start='2020-01-01', end=datetime.datetime.today().strftime('%Y-%m-%d'))
# Focus on 'Close' prices for simplicity
data = data[['Close']]

# Initialize MinMaxScaler to normalize the data between 0 and 1
scaler = MinMaxScaler(feature_range=(0,1))

# Scale the data for training
scaled_data = scaler.fit_transform(data)

# Define training data length (size) as 80% of the total data
training_data_len = int(np.ceil(len(scaled_data) * 0.8))

# Split the scaled data into the training set
train_data = scaled_data[0:int(training_data_len), :]
val_data = scaled_data[int(training_data_len):, :]

x_test = []
y_test = []

for i in range(60, len(val_data)):
    x_test.append(val_data[i-60:i, 0])  
    y_test.append(val_data[i, 0])       

x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])  
    y_train.append(train_data[i, 0])      

# Convert lists to numpy arrays for model training
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape x_train to the format (samples, time steps, features) for LSTM layers
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


In [3]:
model = Sequential()
# First LSTM layer with 50 units and return sequences
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))  # Dropout layer to prevent overfitting
# Second LSTM layer
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))  # Dropout layer to prevent overfitting
# Dense layer with 25 units
model.add(Dense(units=25))
# Output layer with 1 unit (the predicted price)
model.add(Dense(units=1))

# Compile the model using Adam optimizer and mean squared error as the loss function
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=2, epochs=50)




Epoch 1/50


  super().__init__(**kwargs)


[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 17ms/step - loss: 0.0143
Epoch 2/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 0.0022
Epoch 3/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 0.0011
Epoch 4/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 0.0011
Epoch 5/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 9.0531e-04
Epoch 6/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 8.7484e-04
Epoch 7/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 8.8395e-04
Epoch 8/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 0.0011
Epoch 9/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 8.9361e-04
Epoch 10/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0

<keras.src.callbacks.history.History at 0x14d91de10>

In [4]:
y_val = model.predict(x_train)
print(r2_score(y_val, y_train))


[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
0.961342263279633


In [6]:
x_test, y_test = np.array(x_test), np.array(y_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

y_pred = model.predict(x_test)
print(r2_score(y_pred, y_test))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
0.024905777927144213
