In [2]:
# Install required packages
!pip install yfinance numpy pandas matplotlib scikit-learn tensorflow



In [4]:

import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [5]:
# Download historical stock data
ticker = 'AAPL'  # Change to any ticker symbol you want
df = yf.download(ticker, start='2010-01-01', end='2024-01-01')
print("Sample data:")
print(df.head())

  df = yf.download(ticker, start='2010-01-01', end='2024-01-01')
[*********************100%***********************]  1 of 1 completed

Sample data:
Price          Close      High       Low      Open     Volume
Ticker          AAPL      AAPL      AAPL      AAPL       AAPL
Date                                                         
2010-01-04  6.431898  6.446624  6.382909  6.414466  493729600
2010-01-05  6.443017  6.479383  6.409056  6.449630  601904800
2010-01-06  6.340533  6.468564  6.333921  6.443018  552160000
2010-01-07  6.328810  6.371487  6.282827  6.363974  477131200
2010-01-08  6.370887  6.371489  6.283130  6.320396  447610800





In [6]:
# Manual Feature Engineering without TA-Lib

# Moving Averages
df['MA10'] = df['Close'].rolling(window=10).mean()
df['MA50'] = df['Close'].rolling(window=50).mean()

# Exponential Moving Averages
df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
df['EMA50'] = df['Close'].ewm(span=50, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
df['RSI'] = 100 - (100 / (1 + rs))

# MACD Calculation
ema12 = df['Close'].ewm(span=12, adjust=False).mean()
ema26 = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = ema12 - ema26
df['MACDSignal'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Fill NaN values from rolling calculations
df.fillna(method='bfill', inplace=True)

  df.fillna(method='bfill', inplace=True)


In [7]:
# Select features for the model
features = ['Close', 'MA10', 'MA50', 'EMA10', 'EMA50', 'RSI', 'MACD', 'MACDSignal', 'Volume']
data = df[features]

In [8]:
# Scale features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

In [9]:

# Creating sequences for LSTM input
def create_sequences(data, seq_length=60):
    X = []
    y = []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i, 0])  # Predict 'Close' price
    return np.array(X), np.array(y)

SEQ_LENGTH = 60
X, y = create_sequences(scaled_data, SEQ_LENGTH)

In [10]:
# Split into train and test sets (80% train)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Training samples: {X_train.shape[0]}, Testing samples: {X_test.shape[0]}")

Training samples: 2769, Testing samples: 693


In [11]:
# Build LSTM model with Dropout
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(SEQ_LENGTH, len(features))),
    Dropout(0.2),
    LSTM(100, return_sequences=True),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

  super().__init__(**kwargs)


In [None]:
# Train model with EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 58ms/step - loss: 0.0074 - val_loss: 0.0149
Epoch 2/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - loss: 7.5979e-04 - val_loss: 0.0051
Epoch 3/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - loss: 6.3051e-04 - val_loss: 0.0035
Epoch 4/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - loss: 7.3390e-04 - val_loss: 0.0034
Epoch 5/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 58ms/step - loss: 6.0804e-04 - val_loss: 0.0033
Epoch 6/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 59ms/step - loss: 5.3302e-04 - val_loss: 0.0136
Epoch 7/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 58ms/step - loss: 5.0935e-04 - val_loss: 0.0051
Epoch 8/50
[1m 2/44[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 66ms/step - loss: 3.2932e-04

In [None]:

# Plot training and validation loss
plt.figure(figsize=(10,5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.show()

In [None]:

# Make predictions on test data
predictions = model.predict(X_test)

In [None]:
# Inverse scale predictions and true values (only for 'Close' price)
close_min = scaler.data_min_[0]
close_max = scaler.data_max_[0]

predictions_rescaled = predictions * (close_max - close_min) + close_min
y_test_rescaled = y_test * (close_max - close_min) + close_min

In [None]:
# Ploting actual vs predicted prices
plt.figure(figsize=(12,6))
plt.plot(y_test_rescaled, label='Actual Close Price')
plt.plot(predictions_rescaled, label='Predicted Close Price')
plt.title(f'{ticker} Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Price USD')
plt.legend()
plt.show()


In [None]:

#  Evaluating model performance and calculate accuracy metrics
rmse = np.sqrt(mean_squared_error(y_test_rescaled, predictions_rescaled))
mae = mean_absolute_error(y_test_rescaled, predictions_rescaled)
mape = np.mean(np.abs((y_test_rescaled - predictions_rescaled) / y_test_rescaled)) * 100
r2 = r2_score(y_test_rescaled, predictions_rescaled)
accuracy = 100 - mape

print(f'Root Mean Squared Error (RMSE): {rmse:.2f}')
print(f'Mean Absolute Error (MAE): {mae:.2f}')
print(f'Mean Absolute Percentage Error (MAPE): {mape:.2f}%')
print(f'R-squared (Coefficient of Determination): {r2:.4f}')
print(f'Approximate Prediction Accuracy: {accuracy:.2f}%')
