In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Dropout
import os
import shutil
from utils_plot import print_plot_prediction_waves
from dataProcessor import MarketDataProcessor
from utils_plot import print_plot_model_loss
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import USFederalHolidayCalendar

# Libraries that will help us extract only business days in the US.
# Otherwise our dates would be wrong when we look back (or forward).
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())

# Define the path to the data folder
data_folder = 'data/learn'
output_folder = 'training_plots'  # Specify the output folder

# Clean the output folder before each run
if os.path.exists(output_folder):
    shutil.rmtree(output_folder)  # Remove the folder and its contents

# Create the output folder
os.makedirs(output_folder)

# Get a list of all files in the data folder
file_list = [filename for filename in os.listdir(data_folder) if filename.endswith('.csv')]

# Loop through each file
for filename in file_list:
    file_path = os.path.join(data_folder, filename)

    data = MarketDataProcessor(file_path)

    # Load model weights if available
    try:
        model = load_model('a_model')
        print("Model loaded successfully.")
    except:
        print("No model found. Training from scratch.")
        model = Sequential()
        model.add(LSTM(64, activation='relu', input_shape=(
                    data.trainX.shape[1], data.trainX.shape[2]), return_sequences=True))
        model.add(LSTM(32, activation='relu', return_sequences=False))
        model.add(Dropout(0.2))
        model.add(Dense(units=5, activation='sigmoid'))  # Output layer with 5 units for Wave1 to Wave5
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

        """     model = Sequential()
        model.add(LSTM(units=64, activation='relu', input_shape=(data.n_past, 5)))  # Adjust input_shape here
        model.add(Dense(units=32, activation='relu'))
        model.add(Dense(units=5, activation='sigmoid'))  # Output layer with 5 units for Wave1 to Wave5
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) """

    model.summary()

    # Train the model
    #history = model.fit(data.trainX, data.trainY, epochs=250, batch_size=5, validation_split=0.1, verbose=1)
    # fit the model
    model.save('a_model')

    # Save the plot as an image file in the output folder
    output_filename = os.path.join(output_folder, os.path.splitext(filename)[0] + '_plot.png')
    #print_plot_model_loss(output_filename, history.history['loss'], history.history['val_loss'])

    # Evaluate the model on the test data
    #result = model.evaluate(X_test_sequences, y_test_sequences, verbose=1)
    #print("Evaluation result:", result)
    #loss, accuracy = model.evaluate(X_test_sequences, y_test_sequences, verbose=1)
    #print(f'Test loss: {loss:.4f}, Test accuracy: {accuracy:.4f}')

    
    n_days_for_prediction = len(data.original_data) - 1

    # Make prediction using the model
    prediction = model.predict(data.trainX[-n_days_for_prediction:])

     # Inverse transform using the original scaler
    transformer_predicted = data.scaler.inverse_transform(prediction)
    print('transformer_predicted', transformer_predicted)

    # Determine the wave with the largest index for each row
    max_wave_indices = np.argmax(transformer_predicted, axis=1)

    # Generate future prediction dates
    train_dates = data.original_data['Date']
    predict_period_dates = pd.date_range(
        list(train_dates)[-n_days_for_prediction], periods=n_days_for_prediction, freq=us_bd).tolist()

    # Convert timestamp to date
    forecast_dates = []
    for period in predict_period_dates:
        forecast_dates.append(period.date())

    prediction_columns = ['Wave 1', 'Wave 2', 'Wave 3', 'Wave 4', 'Wave 5']

    print(len(forecast_dates))
    print(len(transformer_predicted[:, 0]))
    print(len(transformer_predicted[:, 1]))

        # Create a DataFrame for the forecasted values
    df_forecast = pd.DataFrame({
        'Date': forecast_dates[:len(transformer_predicted[:, 0])],
        'Wave 1': transformer_predicted[:, 0],
        'Wave 2': transformer_predicted[:, 1],
        'Wave 3': transformer_predicted[:, 2],
        'Wave 4': transformer_predicted[:, 3],
        'Wave 5': transformer_predicted[:, 4],
        'Largest_Wave': np.array(prediction_columns)[max_wave_indices]
    })

    # Filter original data to keep only dates after '2023-01-01'
    df_original = data.original_data[-n_days_for_prediction:].copy()

    # Convert the 'Date' column to datetime
    df_original['Date'] = pd.to_datetime(df_original['Date'])
    df_forecast['Date'] = pd.to_datetime(df_forecast['Date'])
    
    # Get the first date from df_forecast
    first_forecast_date = df_forecast['Date'].iloc[0]

    # Save the plot as an image file in the output folder
    output_filename_close = os.path.join(output_folder, os.path.splitext(filename)[0] + '_ew_plot.png')
    print_plot_prediction_waves(output_filename_close, df_original, df_forecast)
 



Model loaded successfully.
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 5, 64)             17920     
                                                                 
 lstm_3 (LSTM)               (None, 32)                12416     
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 5)                 165       
                                                                 
Total params: 30501 (119.14 KB)
Trainable params: 30501 (119.14 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
INFO:tensorflow:Assets written to: a_model\assets


INFO:tensorflow:Assets written to: a_model\assets


transformer_predicted [[   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         185.73685    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.91667    178.74416    183.8769  101151.29   ]
 [   170.         174.9166

<Figure size 1200x600 with 0 Axes>