In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

In [10]:
import sys
sys.path.append(r'C:\Users\HP\week 11 final\time_series_modeling\scripts')  

import data_loading

In [11]:
# Assuming you have the preprocessed data from data_loading.py
data = data_loading.load_stock_data("TSLA", "2015-01-01", "2024-10-31")
preprocessed_data = data_loading.preprocess_data(data)

# Extract closing prices for TSLA
tsla_close = preprocessed_data['Close']

[*********************100%***********************]  1 of 1 completed
  data = data.fillna(method='ffill')


In [None]:
# Train-test split 
train_size = int(len(data) * 0.8)
train, test = data[:train_size], data[train_size:]

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Load your data here (assuming 'Close' column contains the target time series)
# df = pd.read_csv("your_data.csv")
# data = df['Close'].squeeze()  # Ensure data is a Series

# Check for stationarity and apply differencing if needed
def check_stationarity(timeseries, significance_level=0.05):
    result = adfuller(timeseries)
    print(f"ADF Statistic: {result[0]}")
    print(f"p-value: {result[1]}")
    return result[1] < significance_level  # True if stationary

# Differencing to make the data stationary if needed
def make_stationary(data, max_diff_order=2):
    diff_order = 0
    while diff_order < max_diff_order:
        if check_stationarity(data):
            break
        data = data.diff().dropna().squeeze()  # Ensure it's a Series after differencing
        diff_order += 1
    return data, diff_order

# Split data into training and test sets
train_size = int(len(data) * 0.8)
train_data, test_data = data[:train_size].squeeze(), data[train_size:].squeeze()  # Ensure both are Series

# Check stationarity and apply differencing if needed
stationary_data, diff_order = make_stationary(train_data)

# Define ARIMA model with adjusted parameters
p, d, q = 5, diff_order, 0  # Starting values for (p, d, q)

# Fit the ARIMA model with error handling
try:
    arima_model = ARIMA(stationary_data, order=(p, d, q))
    arima_fit = arima_model.fit()
    print("Model fitted successfully!")
except Exception as e:
    print(f"Error fitting ARIMA model: {e}")

# Forecast for the test data period
try:
    forecast_steps = len(test_data)
    arima_forecast = arima_fit.forecast(steps=forecast_steps)
    
    # Reverse differencing if needed
    if diff_order > 0:
        for _ in range(diff_order):
            arima_forecast = arima_forecast.cumsum() + train_data.iloc[-1]

    # Calculate Mean Squared Error
    mse = mean_squared_error(test_data, arima_forecast)
    print(f"Mean Squared Error: {mse}")

    # Plotting the actual vs predicted values
    plt.figure(figsize=(12, 6))
    plt.plot(train_data.index, train_data, label='Training Data')
    plt.plot(test_data.index, test_data, label='Actual Data', color='blue')
    plt.plot(test_data.index, arima_forecast, label='Forecasted Data', color='red')
    plt.legend()
    plt.title("ARIMA Model - Actual vs Forecasted")
    plt.show()

except Exception as e:
    print(f"Error in forecasting: {e}")


In [6]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Fit the SARIMA model
sarima_model = SARIMAX(train, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
sarima_fit = sarima_model.fit()

# Forecast
sarima_forecast = sarima_fit.forecast(steps=len(test))

# Evaluate
mae = mean_absolute_error(test, sarima_forecast)
rmse = np.sqrt(mean_squared_error(test, sarima_forecast))
print(f"SARIMA MAE: {mae}, RMSE: {rmse}")


  self._init_dates(dates, freq)


ValueError: SARIMAX models require univariate `endog`. Got shape (1979, 6).

In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# Assuming 'Close' is the target time series
# df = pd.read_csv('your_data.csv')
# target_data = df[['Close']].values  # Select only 'Close' for scaling

# Scale only the target variable
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Define parameters
time_step = 60

# Convert data into sequences for LSTM
def create_sequences(data, time_step):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])  # Use only the target feature
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

# Prepare training and testing data
train_size = int(len(scaled_data) * 0.8)
train_data, test_data = scaled_data[:train_size], scaled_data[train_size:]

X_train, y_train = create_sequences(train_data, time_step)
X_test, y_test = create_sequences(test_data, time_step)

# Reshape inputs to be [samples, time steps, features]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Define LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    LSTM(50, return_sequences=False),
    Dense(25),
    Dense(1)  # Output layer with a single neuron for regression
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1)

# Make predictions and reverse scaling
lstm_predictions = model.predict(X_test)

# Ensure inverse transform applies only to the target feature
lstm_predictions = scaler.inverse_transform(lstm_predictions)  # Inverse transform the predictions
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))  # Reshape y_test to (n_samples, 1) and inverse transform

# Evaluate and plot
mae = mean_absolute_error(y_test, lstm_predictions)
print(f"Mean Absolute Error: {mae}")

# Plot actual vs. predicted values
plt.figure(figsize=(12, 6))
plt.plot(y_test, label="Actual")
plt.plot(lstm_predictions, label="Predicted")
plt.xlabel("Time Step")
plt.ylabel("Close Price")
plt.legend()
plt.show()


Epoch 1/10


  super().__init__(**kwargs)


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - loss: 0.0272
Epoch 2/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - loss: 0.0018
Epoch 3/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step - loss: 0.0011
Epoch 4/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step - loss: 0.0011
Epoch 5/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - loss: 8.3635e-04
Epoch 6/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 49ms/step - loss: 9.7606e-04
Epoch 7/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - loss: 8.0694e-04
Epoch 8/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 69ms/step - loss: 8.9807e-04
Epoch 9/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - loss: 6.2142e-04
Epoch 10/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 70ms/step - los

ValueError: non-broadcastable output operand with shape (434,1) doesn't match the broadcast shape (434,6)

In [16]:
# Print shapes of data involved in scaling and predictions
print("Shape of target_data:", data.shape)  # Target column before scaling
print("Shape of scaled_data:", scaled_data.shape)  # Data after scaling
print("Shape of X_train:", X_train.shape)          # Training input features
print("Shape of y_train:", y_train.shape)          # Training target
print("Shape of X_test:", X_test.shape)            # Testing input features
print("Shape of y_test before reshaping:", y_test.shape)  # Testing target before reshaping

# Run LSTM prediction
lstm_predictions = model.predict(X_test)
print("Shape of lstm_predictions before inverse transform:", lstm_predictions.shape)  # Predictions shape

# Reshape y_test to ensure it has a compatible shape for inverse scaling
y_test = y_test.reshape(-1, 1)  # Reshape y_test to (n_samples, 1)
print("Shape of y_test after reshaping:", y_test.shape)

# Attempt inverse transform on predictions and y_test
try:
    lstm_predictions = scaler.inverse_transform(lstm_predictions)  # Inverse scale predictions
    y_test = scaler.inverse_transform(y_test)  # Inverse scale actual values
    print("Inverse transform successful.")
except ValueError as e:
    print("Inverse transform error:", e)


Shape of target_data: (2474, 6)
Shape of scaled_data: (2474, 6)
Shape of X_train: (1918, 60, 1)
Shape of y_train: (1918,)
Shape of X_test: (434, 60, 1)
Shape of y_test before reshaping: (434,)
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Shape of lstm_predictions before inverse transform: (434, 1)
Shape of y_test after reshaping: (434, 1)
Inverse transform error: non-broadcastable output operand with shape (434,1) doesn't match the broadcast shape (434,6)
