## MLP-based Values prediction 

### 1. Install required dependency libraries

In [1]:
"""
Install required dependency libraries
-pip install pandas
-pip install scikit-learn
-pip install tensorflow
-import matplotlib.pyplot as plt

"""

'\nInstall required dependency libraries\n-pip install pandas\n-pip install scikit-learn\n-pip install tensorflow\n-import matplotlib.pyplot as plt\n\n'

### 2. Import installed required dependency libraries

In [2]:
# Import required libraries for LSTM model
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import EarlyStopping

### 3. Prepare the datset

In [3]:
# Load the CSV file into a pandas dataframe
dataframe = pd.read_csv("dataset.csv")

In [4]:
#To see the whole inforamtion contenet of the above dataset:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 248776 entries, 0 to 248775
Data columns (total 94 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   index                      248776 non-null  int64  
 1   open                       248776 non-null  float64
 2   high                       248776 non-null  float64
 3   close                      248776 non-null  float64
 4   low                        248776 non-null  float64
 5   timestamp                  248776 non-null  int64  
 6   transactions               248776 non-null  int64  
 7   volume                     248776 non-null  int64  
 8   volume_adi                 248776 non-null  float64
 9   volume_obv                 248776 non-null  int64  
 10  volume_cmf                 248776 non-null  float64
 11  volume_fi                  248776 non-null  float64
 12  volume_em                  248776 non-null  float64
 13  volume_sma_em              24

In [5]:
# To see the first five data content of the above dataset:
dataframe.head()

Unnamed: 0,index,open,high,close,low,timestamp,transactions,volume,volume_adi,volume_obv,...,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,momentum_pvo,momentum_pvo_signal,momentum_pvo_hist,momentum_kama,others_dr,others_dlr,others_cr
0,0,1.30785,1.30805,1.30802,1.30776,1367884800000,471,471,373.551724,471,...,0.0,0.0,0.0,0.0,0.0,0.0,1.30802,12.7948,0.0,0.0
1,1,1.30803,1.3084,1.3083,1.30785,1367885700000,561,561,730.551724,1032,...,0.001708,0.000342,0.001366,1.503033,0.300607,1.202426,1.308143,0.021406,0.021404,0.021406
2,2,1.3083,1.30884,1.30781,1.3078,1367886600000,2309,2309,-1534.04443,-1277,...,3.8e-05,0.000281,-0.000243,24.809883,5.202462,19.607421,1.307998,-0.037453,-0.03746,-0.016055
3,3,1.30781,1.30836,1.3081,1.30771,1367887500000,1909,1909,-1152.24443,632,...,0.000498,0.000324,0.000173,32.724413,10.706852,22.017561,1.308042,0.022174,0.022172,0.006116
4,4,1.30809,1.30833,1.30819,1.30791,1367888400000,926,926,-843.577763,1558,...,0.001401,0.000539,0.000862,29.46035,14.457552,15.002799,1.308107,0.00688,0.00688,0.012997


In [6]:
# Drop the rows containing NaN values
dataframe = dataframe.dropna()

In [7]:
# Extract the time and target columns into separate variables
time = dataframe.iloc[:, 0].values
target = dataframe.iloc[:, 2:5].values

In [8]:
# Remove the time and target columns from the original dataframe to get the input data
inputs =dataframe.iloc[:, 5:].values

In [9]:
# Create a new dataframe with only the input data
new_dataframe = pd.DataFrame(inputs)

In [10]:
# Display the new dataframe
print(new_dataframe.head())

             0       1       2            3       4         5         6    
0  1.367885e+12   471.0   471.0   373.551724   471.0  0.793103  0.000000  \
1  1.367886e+12   561.0   561.0   730.551724  1032.0  0.707899  0.157080   
2  1.367887e+12  2309.0  2309.0 -1534.044430 -1277.0 -0.459157 -0.026990   
3  1.367888e+12  1909.0  1909.0 -1152.244430   632.0 -0.219475  0.055953   
4  1.367888e+12   926.0   926.0  -843.577763  1558.0 -0.136590  0.059865   

         7         8          9   ...        79        80        81   
0  0.000000  0.000000  60.271755  ...  0.000000  0.000000  0.000000  \
1  0.021569  0.021569  60.383598  ...  0.001708  0.000342  0.001366   
2  0.008783  0.015176  -0.744704  ...  0.000038  0.000281 -0.000243   
3 -0.009704  0.006883  -0.441483  ...  0.000498  0.000324  0.000173   
4  0.003855  0.006126   0.487021  ...  0.001401  0.000539  0.000862   

          82         83         84        85         86        87        88  
0   0.000000   0.000000   0.000000  1.

In [11]:
# Split the input data and target values into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(inputs, target, test_size=0.2, random_state=42)

In [12]:
# Normalize the input data using a scaler like the StandardScaler from sklearn.preprocessing
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
# Reshape the input data into a 3D tensor for use with the LSTM model
X_train_reshaped = np.reshape(X_train_scaled, (X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = np.reshape(X_test_scaled, (X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

### 4. Define and build the LSTM-model

In [14]:
# Create an instance of the Sequential model from Keras
lstm_model = Sequential()

In [15]:
# Add an LSTM layer with 128 units and input shape of (1, 28)
lstm_model.add(LSTM(units=128, input_shape=(1, 28)))
# Add a fully connected dense layer with 64 units
lstm_model.add(Dense(units=64, activation='relu'))
# Add another dropout layer
lstm_model.add(Dropout(0.2))
# Add a fully connected dense layer with 32 units
lstm_model.add(Dense(units=32, activation='relu'))
# Add an output layer with 3 units (one for each target variable)
lstm_model.add(Dense(units=3))

In [16]:
# Compile the model using the 'adam' optimizer and mean squared error loss function
lstm_model.compile(optimizer='adam', loss='mse')

In [17]:
# Define early stopping criteria
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

### 5. Train the LSTM-model

In [18]:
# Train the model on the training dataset
lstm_model.fit(X_train_reshaped, y_train, epochs=20, batch_size=32, validation_data=(X_test_reshaped, y_test), callbacks=[early_stopping])

Epoch 1/20


ValueError: in user code:

    File "d:\AI\Value_prediction\value_pred\lib\site-packages\keras\engine\training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "d:\AI\Value_prediction\value_pred\lib\site-packages\keras\engine\training.py", line 1268, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "d:\AI\Value_prediction\value_pred\lib\site-packages\keras\engine\training.py", line 1249, in run_step  **
        outputs = model.train_step(data)
    File "d:\AI\Value_prediction\value_pred\lib\site-packages\keras\engine\training.py", line 1050, in train_step
        y_pred = self(x, training=True)
    File "d:\AI\Value_prediction\value_pred\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "d:\AI\Value_prediction\value_pred\lib\site-packages\keras\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 1, 28), found shape=(None, 1, 89)


### 6. Evaluate the trained model

In [None]:
# Use the trained model to make predictions on the testing dataset
y_pred = lstm_model.predict(X_test_reshaped)



In [None]:
# Create a new scaler for the output features
output_scaler = StandardScaler()
output_scaler.fit(target)
# Invert the scaling of the predictions and actual values
y_pred = output_scaler.inverse_transform(y_pred)
y_test = output_scaler.inverse_transform(y_test)

In [None]:
# Evaluate the performance of the model using metrics like mean squared error, mean absolute error, and R-squared
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r2}")

Mean Squared Error: 1.1315369983738836e-06
Mean Absolute Error: 0.0008963336806977938
R-squared: 0.8851579454913193


### 7. Save the model

In [None]:
# Save the model
lstm_model.save("lstm2_model.h5")