# LSTM with 'y' and 'red' as input and output

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.losses import mean_squared_error, binary_crossentropy
from utils import *

# Function to process multiple dataframes
def process_multiple_dataframes(data_list):
    X = []
    y_regression = []
    y_classification = []
    for df in data_list:
        y_values = df['y'].values
        red_values = df['red'].values
        combined_values = np.column_stack((y_values, red_values))
        X.append(combined_values[:-1])
        y_regression.append(y_values[1:])
        y_classification.append(red_values[1:])
    X = np.concatenate(X)
    y_regression = np.concatenate(y_regression)
    y_classification = np.concatenate(y_classification)
    return X, y_regression, y_classification

# Parameters
path = "spot-the-trend-train-data"
data_count = 4

# Read and process data
data = read_data(path, data_count, True)
X, y_regression, y_classification = process_multiple_dataframes(data)

# Reshape X for LSTM input
X = X.reshape((X.shape[0], 1, X.shape[1]))  # shape (samples, timesteps, features)

# Split the data into training and testing sets
X_train, X_test, y_regression_train, y_regression_test, y_classification_train, y_classification_test = train_test_split(
    X, y_regression, y_classification, test_size=0.2, random_state=42)

# Define the LSTM model with separate output layers
input_layer = Input(shape=(1, 2))
lstm_out = LSTM(20, activation='relu')(input_layer)
dense_1 = Dense(20, activation='relu')(lstm_out)

output_regression = Dense(1, name='regression_output')(dense_1)
output_classification = Dense(1, activation='sigmoid', name='classification_output')(dense_1)

model = Model(inputs=input_layer, outputs=[output_regression, output_classification])

# Define precision and recall metrics
precision = Precision(name='precision')
recall = Recall(name='recall')

# Compile the model with separate losses and metrics
model.compile(optimizer=Adam(),
              loss={'regression_output': mean_squared_error, 'classification_output': binary_crossentropy},
              metrics={'classification_output': ['accuracy', precision, recall]})

# Train the model
model.fit(X_train, {'regression_output': y_regression_train, 'classification_output': y_classification_train},
          epochs=5, batch_size=32, validation_data=(X_test, {'regression_output': y_regression_test, 'classification_output': y_classification_test}))



uopfk.csv
mlhdp.csv
ogfyy.csv
pltbp.csv




Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x10982e6d0>

In [4]:
# Save the entire model in the SavedModel format
model.save('LSTM.h5')

In [None]:
from keras.models import load_model

model = load_model('LSTM.h5')

# Prediction

In [5]:
# Predicting with the model using an iterative approach for test data
predictions_regression = []
predictions_classification = []

for i in range(X_test.shape[0]):
    current_input = X_test[i].reshape(1, 1, 2)
    predicted_regression, predicted_classification = model.predict(current_input)
    predictions_regression.append(predicted_regression[0][0])
    predictions_classification.append(predicted_classification[0][0])
    if i < X_test.shape[0] - 1:
        X_test[i + 1][0][1] = predicted_classification[0][0]  # Use the predicted classification for the next step

# Evaluate the predictions
predictions_classification_binary = [1 if p > 0.5 else 0 for p in predictions_classification]

# Calculate evaluation metrics for regression
mse = np.mean((y_regression_test - np.array(predictions_regression))**2)

# Calculate evaluation metrics for classification
accuracy = np.mean(y_classification_test == predictions_classification_binary)
precision = np.sum((y_classification_test == 1) & (predictions_classification_binary == 1)) / np.sum(predictions_classification_binary == 1)
recall = np.sum((y_classification_test == 1) & (predictions_classification_binary == 1)) / np.sum(y_classification_test == 1)

print(f'Regression MSE: {mse}')
print(f'Classification Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}')



In [None]:

for df in prediction_dfs:
    #print(df)
    interval = df_to_intervals(df)
    print(interval)
    visualize_well(df)