In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler

In [36]:
import sys
sys.path.append(r'C:\Users\HP\week 11 final\time_series_modeling\scripts')  

import data_loading

In [37]:
# Assuming you have the preprocessed data from data_loading.py
data = data_loading.load_stock_data("TSLA", "2015-01-01", "2024-10-31")
preprocessed_data = data_loading.preprocess_data(data)

# Extract closing prices for TSLA
tsla_close = preprocessed_data['Close']

[*********************100%***********************]  1 of 1 completed
  data = data.fillna(method='ffill')


In [38]:
# Train-test split 
train_size = int(len(data) * 0.8)
train, test = data[:train_size], data[train_size:]

In [39]:
preprocessed_data.head()

Price,Adj Close,Close,High,Low,Open,Volume
Ticker,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2015-01-02 00:00:00+00:00,14.620667,14.620667,14.883333,14.217333,14.858,71466000
2015-01-05 00:00:00+00:00,14.006,14.006,14.433333,13.810667,14.303333,80527500
2015-01-06 00:00:00+00:00,14.085333,14.085333,14.28,13.614,14.004,93928500
2015-01-07 00:00:00+00:00,14.063333,14.063333,14.318667,13.985333,14.223333,44526000
2015-01-08 00:00:00+00:00,14.041333,14.041333,14.253333,14.000667,14.187333,51637500


In [40]:
# Load data (assuming you have a CSV file with 'Date' and 'Adj Close' columns)
data = preprocessed_data

In [41]:
# Prepare data for LSTM
def create_dataset(dataset, look_back=60):
  X, y = [], []
  for i in range(len(dataset)-look_back-1):
    a = dataset[i:(i+look_back), 0]
    X.append(a)
    y.append(dataset[i + look_back, 0])
  return np.array(X), np.array(y)


In [42]:
# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data['Adj Close'].values.reshape(-1, 1))

In [43]:
# Split into training and testing sets
train_size = int(len(scaled_data) * 0.8)
test_size = len(scaled_data) - train_size
train, test = scaled_data[0:train_size,:], scaled_data[train_size:len(scaled_data),:]

In [44]:
# Reshape input to be [samples, time steps, features]
look_back = 60
X_train, y_train = create_dataset(train, look_back)
X_test, y_test = create_dataset(test, look_back)

In [45]:
# Reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [46]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(**kwargs)


In [47]:
# Define Early Stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=2)  # Monitor validation loss


In [48]:
# Train the model with Early Stopping
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

Epoch 1/100
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 54ms/step - loss: 0.0221 - val_loss: 0.0021
Epoch 2/100
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 0.0010 - val_loss: 0.0020
Epoch 3/100
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 0.0011 - val_loss: 0.0016
Epoch 4/100
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 7.6724e-04 - val_loss: 0.0016
Epoch 5/100
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 42ms/step - loss: 9.4037e-04 - val_loss: 0.0014
Epoch 6/100
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 7.9611e-04 - val_loss: 0.0015
Epoch 7/100
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 5.1792e-04 - val_loss: 0.0019


In [49]:
# Make predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step


In [50]:
# Evaluate the model
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, predictions)
mape = mean_absolute_percentage_error(y_test, predictions)

print('LSTM Model:')
print('Mean Squared Error:', mse)
print('Root Mean Squared Error:', rmse)
print('Mean Absolute Error:', mae)
print('Mean Absolute Percentage Error:', mape)

LSTM Model:
Mean Squared Error: 52052.33836658447
Root Mean Squared Error: 228.14981561812507
Mean Absolute Error: 225.67949432091103
Mean Absolute Percentage Error: 442.48097961647824


In [51]:
# Define seasonal parameters (adjust values as needed)
p, d, q = 1, 1, 1
seasonal_period = 12  # Or a more descriptive name

# Fit the SARIMA model
model = SARIMAX(adjusted_close, order=(p, d, q), seasonal_order=(1, 1, 1, seasonal_period))
model_fit = model.fit()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
