In [1]:
import numpy as np
import torch
import torch.nn as nns
import jax
import jax.numpy as jnp
from sklearn.model_selection import train_test_split
import jax.numpy as jnp
from jax import jit, lax, value_and_grad
import jax.random as random
import pandas as pd
import pylab as plt
from scipy.stats import boxcox
from sklearn.preprocessing import normalize
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import OneHotEncoder

## Importing Data and Data Transformation

In [2]:
from google.colab import drive
drive.mount('/content/drive')

from scipy.stats import boxcox
import numpy as np
df = pd.read_csv('/content/drive/My Drive/tr_eikon_eod_data.csv', index_col = 0, parse_dates = True).dropna()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [71]:
aapl = pd.DataFrame(df['AAPL.O'])
log_returns = np.log(aapl['AAPL.O'] / aapl['AAPL.O'].shift())
log_returns = pd.DataFrame(log_returns.dropna())
log_returns.rename(columns={"AAPL.O": "log_return"}, inplace=True)

bins = [-np.inf, -0.03, -0.02, -0.015, -0.01, -0.007, -0.005, -0.003, -0.002, -0.001, -0.0005, 0,
        0.0005, 0.001, 0.002, 0.003, 0.005, 0.007, 0.01, 0.015, 0.02, np.inf]

labels = ['01Very Strong Negative', '02Strong Negative', '03Moderate Negative', '04Moderately Weak Negative',
          '05Weak Negative', '06Slightly Weak Negative', '07Very Slightly Weak Negative', '08Extremely Weak Negative',
          '09Super Weak Negative', '10Super Slightly Weak Negative', '11Neutral', '12Super Slightly Weak Positive',
          '13Super Weak Positive', '14Extremely Weak Positive', '15Very Slightly Weak Positive', '16Slightly Weak Positive',
          '17Weak Positive', '18Moderately Weak Positive', '19Moderate Positive', '20Strong Positive', '21Very Strong Positive']

log_returns['category'] = pd.cut(log_returns['log_return'], bins=bins, labels=labels)
encoder = OneHotEncoder(sparse=False)
encoded_categories = encoder.fit_transform(log_returns['category'].values.reshape(-1, 1))
X = []
y = []
window_size =20
output_size = 1
for i in range(len(encoded_categories) - window_size - output_size + 1):
    X.append(encoded_categories[i:i + window_size])
    y.append(encoded_categories[i + window_size:i + window_size + output_size])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = False)
y_train = y_train.reshape(y_train.shape[0], y_train.shape[1]*y_train.shape[2])
y_test = y_test.reshape(y_test.shape[0], y_test.shape[1]*y_test.shape[2])

print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)


model = Sequential()

model.add(LSTM(units=200, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=100, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=100))
model.add(Dropout(0.2))

model.add(Dense(50, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(y_train.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=128, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

prediction = model.predict(X_test)
predicted_classes = np.argmax(prediction, axis=1)
actual_classes = np.argmax(y_test, axis=1)



Shape of y_train: (1693, 21)
Shape of y_test: (424, 21)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 3.4825310707092285
Test Accuracy: 0.08018867671489716


In [98]:
sum_first_eleven = prediction[:, :12].sum(axis=1)
sum_last_eleven = prediction[:, 12:].sum(axis=1)
transformed_predictions = np.column_stack((sum_first_eleven, sum_last_eleven))
predicted_class = np.argmax(transformed_predictions, axis = 1)
tmp = pd.DataFrame()
tmp['direction_predicted'] = predicted_class
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 12, 1, 0)
accuracy = np.sum(tmp['direction_predicted'] == tmp['direction_actual']) / len(tmp)

print(f"Prediction Accuracy for AAPL Stock with cumulative sepreation = {accuracy * 100}%")

Prediction Accuracy for AAPL Stock with cumulative sepreation = 52.358490566037744%


In [99]:
tmp = pd.DataFrame()
tmp['predicted_class'] = predicted_classes
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 12, 1, 0)
tmp['direction_predicted'] = np.where(tmp['predicted_class'] >= 12, 1, 0)
tmp, np.sum(tmp['direction_actual']) / len(tmp)

(     predicted_class  actual_class  direction_actual  direction_predicted
 0                  9            17                 1                    0
 1                 17            16                 1                    1
 2                 17             1                 0                    1
 3                 17             4                 0                    1
 4                 19             5                 0                    1
 ..               ...           ...               ...                  ...
 419                1             3                 0                    0
 420                1            18                 1                    0
 421                1             8                 0                    0
 422                1            17                 1                    0
 423               19             7                 0                    1
 
 [424 rows x 4 columns],
 0.49764150943396224)

## For GS.N


In [None]:
gs = pd.DataFrame(df['GS.N'])
log_returns = np.log(gs['GS.N'] / gs['GS.N'].shift())
log_returns = pd.DataFrame(log_returns.dropna())
log_returns.rename(columns={"GS.N": "log_return"}, inplace=True)

bins = [-np.inf, -0.01, -0.001, 0, 0.001, 0.01, np.inf]
labels = ['1Strong Negative', '2Negative', '3Weak Negative', '4Weak Positive', '5Positive', '6Strong Positive']

log_returns['category'] = pd.cut(log_returns['log_return'], bins=bins, labels=labels)

from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)
encoded_categories = encoder.fit_transform(log_returns['category'].values.reshape(-1, 1))
X = []
y = []
window_size = 5
output_size = 1
for i in range(len(encoded_categories) - window_size - output_size + 1):
    X.append(encoded_categories[i:i + window_size])
    y.append(encoded_categories[i + window_size:i + window_size + output_size])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = False)
y_train = y_train.reshape(y_train.shape[0], y_train.shape[1]*y_train.shape[2])
y_test = y_test.reshape(y_test.shape[0], y_test.shape[1]*y_test.shape[2])

print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

model = Sequential()

model.add(LSTM(units=100, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))

model.add(LSTM(units=100))

model.add(Dense(50, activation='relu'))

model.add(Dense(y_train.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

prediction = model.predict(X_test)
predicted_classes = np.argmax(prediction, axis=1)
actual_classes = np.argmax(y_test, axis=1)

tmp = pd.DataFrame()
tmp['predicted_class'] = predicted_classes
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 3, 1, 0)
tmp['direction_predicted'] = np.where(tmp['predicted_class'] >= 3, 1, 0)
accuracy = np.sum(tmp['direction_predicted'] == tmp['direction_actual']) / len(tmp)
print(f"Prediction Accuracy for AAPL Stock with simple sepreation = {accuracy * 100}%")

sum_first_three = prediction[:, :3].sum(axis=1)
sum_last_three = prediction[:, 3:].sum(axis=1)
transformed_predictions = np.column_stack((sum_first_three, sum_last_three))
predicted_class = np.argmax(transformed_predictions, axis = 1)
tmp = pd.DataFrame()
tmp['direction_predicted'] = predicted_class
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 3, 1, 0)
accuracy = np.sum(tmp['direction_predicted'] == tmp['direction_actual']) / len(tmp)

print(f"Prediction Accuracy for AAPL Stock with cumulative sepreation = {accuracy * 100}%")

Shape of y_train: (1705, 6)
Shape of y_test: (427, 6)




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 1.7012344598770142
Test Accuracy: 0.2201405167579651
Prediction Accuracy for AAPL Stock with simple sepreation = 46.604215456674474%
Prediction Accuracy for AAPL Stock with cumulative sepreation = 47.07259953161593%


## MSFT.O

In [None]:
msft = pd.DataFrame(df['MSFT.O'])
log_returns = np.log(msft['MSFT.O'] / msft['MSFT.O'].shift())
log_returns = pd.DataFrame(log_returns.dropna())
log_returns.rename(columns={"MSFT.O": "log_return"}, inplace=True)

bins = [-np.inf, -0.01, -0.001, 0, 0.001, 0.01, np.inf]
labels = ['1Strong Negative', '2Negative', '3Weak Negative', '4Weak Positive', '5Positive', '6Strong Positive']

log_returns['category'] = pd.cut(log_returns['log_return'], bins=bins, labels=labels)
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)
encoded_categories = encoder.fit_transform(log_returns['category'].values.reshape(-1, 1))
X = []
y = []
window_size = 5
output_size = 1
for i in range(len(encoded_categories) - window_size - output_size + 1):
    X.append(encoded_categories[i:i + window_size])
    y.append(encoded_categories[i + window_size:i + window_size + output_size])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = False)
y_train = y_train.reshape(y_train.shape[0], y_train.shape[1]*y_train.shape[2])
y_test = y_test.reshape(y_test.shape[0], y_test.shape[1]*y_test.shape[2])

print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

model = Sequential()

model.add(LSTM(units=100, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))

model.add(LSTM(units=100))

model.add(Dense(50, activation='relu'))

model.add(Dense(y_train.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

prediction = model.predict(X_test)
predicted_classes = np.argmax(prediction, axis=1)
actual_classes = np.argmax(y_test, axis=1)

tmp = pd.DataFrame()
tmp['predicted_class'] = predicted_classes
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 3, 1, 0)
tmp['direction_predicted'] = np.where(tmp['predicted_class'] >= 3, 1, 0)
accuracy = np.sum(tmp['direction_predicted'] == tmp['direction_actual']) / len(tmp)
print(f"Prediction Accuracy for AAPL Stock with simple sepreation = {accuracy * 100}%")

sum_first_three = prediction[:, :3].sum(axis=1)
sum_last_three = prediction[:, 3:].sum(axis=1)
transformed_predictions = np.column_stack((sum_first_three, sum_last_three))
predicted_class = np.argmax(transformed_predictions, axis = 1)
tmp = pd.DataFrame()
tmp['direction_predicted'] = predicted_class
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 3, 1, 0)
accuracy = np.sum(tmp['direction_predicted'] == tmp['direction_actual']) / len(tmp)

print(f"Prediction Accuracy for AAPL Stock with cumulative sepreation = {accuracy * 100}%")

Shape of y_train: (1705, 6)
Shape of y_test: (427, 6)




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 1.7963602542877197
Test Accuracy: 0.2505854666233063
Prediction Accuracy for AAPL Stock with simple sepreation = 49.88290398126464%
Prediction Accuracy for AAPL Stock with cumulative sepreation = 50.81967213114754%


## INTC.O

In [None]:
intc = pd.DataFrame(df['INTC.O'])
log_returns = np.log(intc['INTC.O'] / intc['INTC.O'].shift())
log_returns = pd.DataFrame(log_returns.dropna())
log_returns.rename(columns={"INTC.O": "log_return"}, inplace=True)

bins = [-np.inf, -0.01, -0.001, 0, 0.001, 0.01, np.inf]
labels = ['1Strong Negative', '2Negative', '3Weak Negative', '4Weak Positive', '5Positive', '6Strong Positive']

log_returns['category'] = pd.cut(log_returns['log_return'], bins=bins, labels=labels)
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)
encoded_categories = encoder.fit_transform(log_returns['category'].values.reshape(-1, 1))
X = []
y = []
window_size = 5
output_size = 1
for i in range(len(encoded_categories) - window_size - output_size + 1):
    X.append(encoded_categories[i:i + window_size])
    y.append(encoded_categories[i + window_size:i + window_size + output_size])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = False)
y_train = y_train.reshape(y_train.shape[0], y_train.shape[1]*y_train.shape[2])
y_test = y_test.reshape(y_test.shape[0], y_test.shape[1]*y_test.shape[2])

print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

model = Sequential()

model.add(LSTM(units=100, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))

model.add(LSTM(units=100))

model.add(Dense(50, activation='relu'))

model.add(Dense(y_train.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

prediction = model.predict(X_test)
predicted_classes = np.argmax(prediction, axis=1)
actual_classes = np.argmax(y_test, axis=1)

tmp = pd.DataFrame()
tmp['predicted_class'] = predicted_classes
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 3, 1, 0)
tmp['direction_predicted'] = np.where(tmp['predicted_class'] >= 3, 1, 0)
accuracy = np.sum(tmp['direction_predicted'] == tmp['direction_actual']) / len(tmp)
print(f"Prediction Accuracy for AAPL Stock with simple sepreation = {accuracy * 100}%")

sum_first_three = prediction[:, :3].sum(axis=1)
sum_last_three = prediction[:, 3:].sum(axis=1)
transformed_predictions = np.column_stack((sum_first_three, sum_last_three))
predicted_class = np.argmax(transformed_predictions, axis = 1)
tmp = pd.DataFrame()
tmp['direction_predicted'] = predicted_class
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 3, 1, 0)
accuracy = np.sum(tmp['direction_predicted'] == tmp['direction_actual']) / len(tmp)

print(f"Prediction Accuracy for AAPL Stock with cumulative sepreation = {accuracy * 100}%")

Shape of y_train: (1705, 6)
Shape of y_test: (427, 6)




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 2.0103325843811035
Test Accuracy: 0.2786885201931
Prediction Accuracy for AAPL Stock with simple sepreation = 52.69320843091335%
Prediction Accuracy for AAPL Stock with cumulative sepreation = 48.711943793911004%


## AMZN.O

In [None]:
amzn = pd.DataFrame(df['AMZN.O'])
log_returns = np.log(amzn['AMZN.O'] / amzn['AMZN.O'].shift())
log_returns = pd.DataFrame(log_returns.dropna())
log_returns.rename(columns={"AMZN.O": "log_return"}, inplace=True)

bins = [-np.inf, -0.01, -0.001, 0, 0.001, 0.01, np.inf]
labels = ['1Strong Negative', '2Negative', '3Weak Negative', '4Weak Positive', '5Positive', '6Strong Positive']

log_returns['category'] = pd.cut(log_returns['log_return'], bins=bins, labels=labels)
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)
encoded_categories = encoder.fit_transform(log_returns['category'].values.reshape(-1, 1))
X = []
y = []
window_size = 5
output_size = 1
for i in range(len(encoded_categories) - window_size - output_size + 1):
    X.append(encoded_categories[i:i + window_size])
    y.append(encoded_categories[i + window_size:i + window_size + output_size])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = False)
y_train = y_train.reshape(y_train.shape[0], y_train.shape[1]*y_train.shape[2])
y_test = y_test.reshape(y_test.shape[0], y_test.shape[1]*y_test.shape[2])

print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

model = Sequential()

model.add(LSTM(units=100, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))

model.add(LSTM(units=100))

model.add(Dense(50, activation='relu'))

model.add(Dense(y_train.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

prediction = model.predict(X_test)
predicted_classes = np.argmax(prediction, axis=1)
actual_classes = np.argmax(y_test, axis=1)

tmp = pd.DataFrame()
tmp['predicted_class'] = predicted_classes
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 3, 1, 0)
tmp['direction_predicted'] = np.where(tmp['predicted_class'] >= 3, 1, 0)
accuracy = np.sum(tmp['direction_predicted'] == tmp['direction_actual']) / len(tmp)
print(f"Prediction Accuracy for AAPL Stock with simple sepreation = {accuracy * 100}%")

sum_first_three = prediction[:, :3].sum(axis=1)
sum_last_three = prediction[:, 3:].sum(axis=1)
transformed_predictions = np.column_stack((sum_first_three, sum_last_three))
predicted_class = np.argmax(transformed_predictions, axis = 1)
tmp = pd.DataFrame()
tmp['direction_predicted'] = predicted_class
tmp['actual_class'] = actual_classes
tmp['direction_actual'] = np.where(tmp['actual_class'] >= 3, 1, 0)
accuracy = np.sum(tmp['direction_predicted'] == tmp['direction_actual']) / len(tmp)

print(f"Prediction Accuracy for AAPL Stock with cumulative sepreation = {accuracy * 100}%")

Shape of y_train: (1705, 6)
Shape of y_test: (427, 6)




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 1.8082423210144043
Test Accuracy: 0.2505854666233063
Prediction Accuracy for AAPL Stock with simple sepreation = 53.86416861826698%
Prediction Accuracy for AAPL Stock with cumulative sepreation = 52.92740046838408%


In [6]:
aapl = pd.DataFrame(df['AAPL.O'])

In [63]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten, Reshape

# Assuming you have loaded your data into a DataFrame named 'aapl'
time_series_data = aapl['AAPL.O'].values

def create_dataset(time_series_data, n_steps):
    X, y = [], []
    for i in range(len(time_series_data) - n_steps):
        end_ix = i + n_steps
        if end_ix + 10 < len(time_series_data):
            X.append(time_series_data[i:end_ix])
            y.append(1 if time_series_data[end_ix + 1] > time_series_data[end_ix] else 0)
    return np.array(X), np.array(y)

n_steps = 40

X, y = create_dataset(time_series_data, n_steps)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = False)

# Normalize the input features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape input features for CNN input
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


In [64]:
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=64, kernel_size=26, activation='relu', input_shape=(40, 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())

# Reshape output of CNN to match LSTM input shape
cnn_model.add(Reshape((-1, cnn_model.output_shape[1])))

# Define the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(50, input_shape=(1, cnn_model.output_shape[2])))  # Input shape: (batch_size, timesteps, features)

# Combine the models
combined_model = Sequential()
combined_model.add(cnn_model)
combined_model.add(lstm_model)
combined_model.add(Dense(1, activation='sigmoid'))

# Compile the model
combined_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the combined model
combined_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7a077c5b2c80>