In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Function to prepare the data
def prepare_data(data, window_size):
    X, y = [], []
    for i in range(len(data) - window_size - 1):
        window = data[i:(i + window_size), 0]
        X.append(window)
        y.append(data[i + window_size, 0])
    return np.array(X), np.array(y)

# Download stock data using Yahoo Finance
stock_symbol = 'FR.PA'  # Change this to desired stock symbol
stock_data = yf.download(stock_symbol, start='2013-01-01', end='2022-01-01')
stock_data = stock_data['Open']
pct_change_data = stock_data.pct_change()
pct_change_data = pct_change_data.drop(pct_change_data.index[0])

# Preprocess data
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_data = scaler.fit_transform(pct_change_data.values.reshape(-1, 1))

# Define window size for time series data
window_size = 30

# Prepare data for LSTM model
X, y = prepare_data(scaled_data, window_size)
X = X.reshape(X.shape[0], X.shape[1], 1)

X_last_price = [sublist[-1][0] for sublist in X]

# Split data into training and testing sets
split = int(0.8 * len(X))
X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
X_last_price_test = X[split:]

print(f'length of y_train: {len(y_train)}')
print(f'length of y_test: {len(y_test)}')

[*********************100%***********************]  1 of 1 completed
length of y_train: 1817
length of y_test: 455


In [2]:
print(X_train)
print(y_train)

[[[-0.17852352]
  [-0.29106765]
  [-0.26104539]
  ...
  [-0.16788471]
  [-0.24811016]
  [-0.25627899]]

 [[-0.29106765]
  [-0.26104539]
  [-0.24632583]
  ...
  [-0.24811016]
  [-0.25627899]
  [-0.2309367 ]]

 [[-0.26104539]
  [-0.24632583]
  [-0.27406397]
  ...
  [-0.25627899]
  [-0.2309367 ]
  [-0.17797616]]

 ...

 [[-0.37614279]
  [-0.34260899]
  [-0.2684205 ]
  ...
  [-0.45551694]
  [-0.62015639]
  [-0.73152794]]

 [[-0.34260899]
  [-0.2684205 ]
  [-0.15117268]
  ...
  [-0.62015639]
  [-0.73152794]
  [ 1.        ]]

 [[-0.2684205 ]
  [-0.15117268]
  [-0.0875759 ]
  ...
  [-0.73152794]
  [ 1.        ]
  [ 0.13577685]]]
[-0.2309367  -0.17797616 -0.23682454 ...  1.          0.13577685
  0.51640454]


In [3]:
# Build LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32)

2023-12-29 10:05:39.890325: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x128fdbfd0>

In [6]:
# Evaluate the model
predicted_price = model.predict(X_test)
predicted_price = scaler.inverse_transform(predicted_price)
real_price = scaler.inverse_transform(y_test.reshape(-1, 1))
last_price = scaler.inverse_transform(X_last_price_test[:, -1, :])

predicted_price_df = pd.DataFrame(predicted_price, columns=['predicted_price'])
real_price_df = pd.DataFrame(real_price, columns=['real_price'])
last_price_df = pd.DataFrame(last_price, columns=['last_price'])

compare_df = pd.concat([last_price_df, predicted_price_df, real_price_df], axis='columns')
compare_df['real_trend_up'] = (compare_df['real_price'] > 0)
compare_df['predicted_trend_up'] = (compare_df['predicted_price'] > 0)
compare_df['true_prediction'] = (compare_df['real_trend_up'] == compare_df['predicted_trend_up'])

accuracy = compare_df['true_prediction'].sum() / len(compare_df['true_prediction'])
print(f'accuracy: {accuracy}')

accuracy: 0.4857142857142857


In [7]:
compare_df.head(30)

Unnamed: 0,last_price,predicted_price,real_price,real_trend_up,predicted_trend_up,true_prediction
0,0.115858,0.011351,0.114861,True,True,True
1,0.114861,0.012281,-0.040454,False,True,False
2,-0.040454,0.009125,-0.014255,False,True,False
3,-0.014255,0.004825,-0.050769,False,True,False
4,-0.050769,0.000538,-0.03436,False,True,False
5,-0.03436,-0.002356,0.01712,True,False,False
6,0.01712,-0.002735,-0.050165,False,False,True
7,-0.050165,-0.003038,0.005559,True,False,False
8,0.005559,-0.002179,0.013822,True,False,False
9,0.013822,-0.000651,0.062713,True,False,False
