In [61]:
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import math
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer

In [30]:
df = pd.read_csv('Nasdaq100_interpolate.csv')
df.columns = ['Date', 'nasdaq100_price', 'crude_price', 'gdp', 'rate']
df.head()

Unnamed: 0,Date,nasdaq100_price,crude_price,gdp,rate
0,2004-03-22,1381.390015,37.110001,12092.00533,1.01
1,2004-03-23,1370.040039,37.450001,12094.086297,0.99
2,2004-03-24,1381.859985,37.009998,12096.167264,0.99
3,2004-03-25,1425.859985,35.509998,12098.248231,1.02
4,2004-03-26,1415.390015,35.73,12100.329198,1.0


In [31]:
df.shape

(5033, 5)

In [32]:
df['rate'] /= 100
df['gdp'] *= 1000000000

df.dropna(inplace = True)
df.head()

Unnamed: 0,Date,nasdaq100_price,crude_price,gdp,rate
0,2004-03-22,1381.390015,37.110001,12092010000000.0,0.0101
1,2004-03-23,1370.040039,37.450001,12094090000000.0,0.0099
2,2004-03-24,1381.859985,37.009998,12096170000000.0,0.0099
3,2004-03-25,1425.859985,35.509998,12098250000000.0,0.0102
4,2004-03-26,1415.390015,35.73,12100330000000.0,0.01


In [33]:
correlation_matrix = df.iloc[:,1:5].corr()

fig = go.Figure(data=go.Heatmap(
    z=correlation_matrix.values,
    x=correlation_matrix.index,
    y=correlation_matrix.columns,
    colorscale='Viridis',  
))


fig.update_layout(
    title="Correlation Matrix",
    xaxis_title="Features",
    yaxis_title="Features"
)


fig.show()


In [34]:
df.isnull().sum()

Date               0
nasdaq100_price    0
crude_price        0
gdp                0
rate               0
dtype: int64

In [35]:
df['SMA_50'] = df['nasdaq100_price'].rolling(50).mean()
df['SMA_100'] = df['nasdaq100_price'].rolling(100).mean()

In [36]:
df.shape

(4916, 7)

In [37]:
fig = px.line(df, x='Date', y=['nasdaq100_price', 'SMA_50', 'SMA_100'],
              labels={'value': 'Price', 'variable': 'Metric', 'Date': 'Date'},
              title='Nasdaq Price Chart')

fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type="date"))

fig.show()

In [38]:
fig = px.line(df, x='Date', y='gdp',
              labels={'value': 'US GDP', 'variable': 'Metric', 'Date': 'Date'},
              title='US GDP Chart')

fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type="date"))

fig.show()

In [39]:
fig = px.line(df, x='Date', y='rate',
              labels={'value': 'Fed Rate', 'variable': 'Metric', 'Date': 'Date'},
              title='Fed Rates Chart')

fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type="date"))

fig.show()

In [64]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[['nasdaq100_price']])

train_size = int(len(scaled_data) * 0.8)
train_data, test_data = scaled_data[:train_size], scaled_data[train_size:]

In [65]:
def create_dataset(data, time_step):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

time_step = 60
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (3871, 60, 1)
Shape of X_test: (923, 60, 1)
Shape of y_train: (3871,)
Shape of y_test: (923,)


In [55]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')


history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=64, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [63]:
# model_path = "uni_lstm_model.h5"
# model.save(model_path)

In [56]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(1, len(history.history['loss'])+1), y=history.history['loss'], mode='lines', name='Training Loss'))
fig.add_trace(go.Scatter(x=np.arange(1, len(history.history['val_loss'])+1), y=history.history['val_loss'], mode='lines', name='Validation Loss'))
fig.update_layout(title='Loss vs. Validation Loss', xaxis_title='Epoch', yaxis_title='Loss', template='plotly_dark')
fig.show()

In [58]:

y_pred = model.predict(X_test)


predicted_test = scaler.inverse_transform(y_pred)


fig = go.Figure()
fig.add_trace(go.Scatter(x=df['Date'][:train_size], y=df['nasdaq100_price'][:train_size], mode='lines', name='Train Data'))
fig.add_trace(go.Scatter(x=df['Date'][train_size+time_step:], y=df['nasdaq100_price'][train_size+time_step:], mode='lines', name='Test Data'))
fig.add_trace(go.Scatter(x=df['Date'][train_size+time_step:], y=predicted_test.flatten(), mode='lines', name='Predicted Test Data'))
fig.update_layout(title='Nasdaq100 Price Prediction', xaxis_title='Date', yaxis_title='Price', template='plotly_dark')
fig.show()


In [62]:
y_pred_inv = scaler.inverse_transform(y_pred)
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))


mse = mean_squared_error(y_test_inv, y_pred_inv)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test_inv, y_pred_inv)
r2 = r2_score(y_test_inv, y_pred_inv)

print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2):", r2)

Mean Squared Error (MSE): 79828.70129501297
Root Mean Squared Error (RMSE): 282.5397340110112
Mean Absolute Error (MAE): 229.5575664020175
R-squared (R2): 0.9813863800128089
