# Importing Important Libraries

In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Function to compute technical indicators


In [2]:
def compute_technical_indicators(df):
    df['SMA_10'] = df['Close'].rolling(window=10).mean()
    df['SMA_30'] = df['Close'].rolling(window=30).mean()
    df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
    df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    df['MACD'] = df['EMA_12'] - df['EMA_26']
    df['MACD_Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
    df['BB_Middle'] = df['Close'].rolling(window=20).mean()
    df['BB_Std'] = df['Close'].rolling(window=20).std()
    df['BB_Upper'] = df['BB_Middle'] + 2 * df['BB_Std']
    df['BB_Lower'] = df['BB_Middle'] - 2 * df['BB_Std']
    return df

# Function to prepare data for LSTM


In [3]:
def prepare_lstm_data(X, y, time_steps=60):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

# Function to predict future values


In [4]:
def predict_future(model, last_sequence, scaler_y, future_days, time_steps, n_components):
    future_predictions = []  # Initialize the list to store future predictions
    current_sequence = last_sequence.copy()  # Shape: (time_steps, n_components)

    for _ in range(future_days):
        # Reshape the sequence for the model (1, time_steps, n_components)
        current_sequence_reshaped = current_sequence.reshape(1, time_steps, -1)
        # Predict the next value (scaled)
        next_pred_scaled = model.predict(current_sequence_reshaped, verbose=0)
        # Inverse transform the prediction to original scale
        next_pred = scaler_y.inverse_transform(next_pred_scaled)
        future_predictions.append(next_pred[0, 0])

        # Replicate the predicted scaled value across all PCA components to match shape
        # This is an approximation since we can't predict PCA components directly
        next_pred_scaled_replicated = np.repeat(next_pred_scaled, n_components, axis=1)  # Shape: (1, n_components)
        # Update the sequence: remove the oldest time step, append the new "PCA" values
        current_sequence = np.vstack((current_sequence[1:], next_pred_scaled_replicated))

    return np.array(future_predictions)

# Step 1: Fetch stock data


In [5]:
ticker = 'TSLA'
end_date = datetime.now()
start_date = end_date - timedelta(days=5*365)  # 5 years of data
stock_data = yf.download(ticker, start=start_date, end=end_date)

# Ensure the DataFrame has a flat column structure
if isinstance(stock_data.columns, pd.MultiIndex):
    stock_data.columns = stock_data.columns.get_level_values(0)
print("Columns after fetching data:", stock_data.columns)

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

Columns after fetching data: Index(['Close', 'High', 'Low', 'Open', 'Volume'], dtype='object', name='Price')





# Step 2: Compute technical indicators


In [6]:
stock_data = compute_technical_indicators(stock_data)

# Step 3: Select features and handle missing values


In [7]:
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'SMA_10', 'SMA_30', 
            'EMA_12', 'EMA_26', 'RSI', 'MACD', 'MACD_Signal', 
            'BB_Middle', 'BB_Upper', 'BB_Lower']
stock_data = stock_data[features].dropna()

# Step 4: Prepare features and target


In [8]:
X = stock_data.drop(columns=['Close'])
y = stock_data['Close'].values

# Step 5: Standardize the features and target


In [9]:
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

# Scale the target variable (y)
scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

# Step 6: Apply PCA


In [10]:
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

# Select components explaining 85% of variance
explained_variance_ratio = np.cumsum(pca.explained_variance_ratio_)
n_components = np.argmax(explained_variance_ratio >= 0.85) + 1
X_pca = X_pca[:, :n_components]

# Step 7: Prepare data for LSTM


In [11]:
time_steps = 60
X_lstm, y_lstm = prepare_lstm_data(X_pca, y_scaled, time_steps)

# Step 8: Split data into train and test sets


In [12]:
train_size = int(0.8 * len(X_lstm))
X_train, X_test = X_lstm[:train_size], X_lstm[train_size:]
y_train, y_test = y_lstm[:train_size], y_lstm[train_size:]


# Step 9: Build LSTM model


In [13]:
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(time_steps, n_components)),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

  super().__init__(**kwargs)


# Step 10: Train the model with more epochs


In [14]:
history = model.fit(X_train, y_train, epochs=100, batch_size=32, 
                    validation_split=0.1, verbose=1)

Epoch 1/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - loss: 0.2683 - val_loss: 0.0301
Epoch 2/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0793 - val_loss: 0.0350
Epoch 3/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 0.0719 - val_loss: 0.0288
Epoch 4/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0635 - val_loss: 0.0252
Epoch 5/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0654 - val_loss: 0.0290
Epoch 6/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0751 - val_loss: 0.0230
Epoch 7/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0511 - val_loss: 0.0205
Epoch 8/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0582 - val_loss: 0.0296
Epoch 9/100
[1m27/27[0m [32m━━━━━━━━━

# Step 11: Make predictions on test data


In [15]:
y_pred_scaled = model.predict(X_test)

# Inverse transform the predictions and actual values
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_test_actual = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


# Step 12: Evaluate the model


In [16]:
mse = mean_squared_error(y_test_actual, y_pred)
print(f'Mean Squared Error on Test Data: {mse:.2f}')

Mean Squared Error on Test Data: 533.32


# Step 13: Predict future values (e.g., 30 days into the future)


In [17]:
future_days = 30
last_sequence = X_lstm[-1]  # Last sequence from the test data
future_predictions = predict_future(model, last_sequence, scaler_y, future_days, time_steps, n_components)

# Step 14: Prepare data for plotting


In [18]:
test_length = len(y_test_actual)
future_x = np.arange(test_length, test_length + future_days)
# Flatten y_pred to 1D since it's (n_samples, 1)
y_pred_flat = y_pred.flatten()
# Ensure future_predictions is 1D (it already is, but for clarity)
future_predictions = future_predictions.flatten()
# Concatenate the 1D arrays
combined_predictions = np.concatenate([y_pred_flat, future_predictions])

# Step 15: Plot results with future predictions


In [None]:
plt.figure(figsize=(12, 6))
plt.plot(np.arange(test_length), y_test_actual, label='Actual Price', color='blue')
plt.plot(np.arange(test_length + future_days), combined_predictions, label='Predicted Price', color='orange', linestyle='--')
plt.axvline(x=test_length, color='gray', linestyle='--', label='Start of Future Prediction')
plt.title(f'{ticker} Stock Price Prediction with Future Forecast')
plt.xlabel('Time (Days)')
plt.ylabel('Price')
plt.legend()
plt.savefig('results/tsla_stock_price_prediction_with_future.png')  # Updated pathplt.close()

# Step 16: Plot explained variance ratio


In [None]:
plt.figure(figsize=(8, 5))
plt.plot(explained_variance_ratio, marker='o')
plt.axhline(y=0.85, color='r', linestyle='--')
plt.title('PCA Explained Variance Ratio')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.savefig('results/tsla_pca_variance.png')  # Updated pathplt.close()