In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional
import yfinance as yf

import warnings

warnings.filterwarnings("ignore")

## Importing data

In [None]:
df = pd.read_csv(r'C:\Users\joneh\master_thesis\data\time_series\CLc1_processed.csv')

df

## Create model

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data.values.reshape(-1,1))

y = data[['std_one_day', 'std_two_day']].values
X = data.drop(['Adj Close', 'Log Returns'], axis=1).values

# Convert the x and y to numpy arrays
X, y = np.array(X), np.array(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Reshape the data into the shape accepted by the LSTM
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, batch_size=1, epochs=20)

# Test the model
predictions = model.predict(X_test)

train_loss = history.history['loss']

fig, ax = plt.subplots(figsize=(7, 5))
ax.plot(train_loss, label='Training loss')
ax.legend(loc='best')
ax.set_title('Mean Squared Error Loss')
ax.grid(alpha=0.3)
plt.show()


In [None]:
# convert y_test to dataframe for plotting
display(pd.DataFrame(y_test, columns=['std_one_day', 'std_two_day']))

results_one_day = pd.DataFrame({'Actual': y_test[0].flatten(), 'Predictions': predictions[0].flatten()})
results_two_day = pd.DataFrame({'Actual': y_test[1].flatten(), 'Predictions': predictions[1].flatten()})

display(results_one_day)

# Plot predictions and actual values
fig, ax = plt.subplots(figsize=(10,6))
results_one_day[:50].plot(ax=ax)
results_two_day[:50].plot(ax=ax)
ax.plot()
ax.set_title('Volatility Prediction')
ax.set_xlabel('Date')
ax.set_ylabel('Stock Price')
ax.legend()


mse = mean_squared_error(y_test, predictions)
print('MSE: ', mse)
