In [1]:
import os
import numpy as np
import seaborn as sns
from tensorflow.keras.models import load_model
from dataProcessor import MarketDataProcessor
import shutil  # Import the shutil module for file operations
import pandas as pd

from utils_plot import print_plot_prediction_waves
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import USFederalHolidayCalendar


# Define the path to the data folder
data_folder = 'data/predict'
output_folder = 'generated_plots'  # Specify the output folder

# Get a list of all files in the data folder
file_list = [filename for filename in os.listdir(data_folder) if filename.endswith('.csv')]

# Clean the output folder before each run
if os.path.exists(output_folder):
    shutil.rmtree(output_folder)  # Remove the folder and its contents

# Create the output folder
os.makedirs(output_folder)


# Libraries that will help us extract only business days in the US.
# Otherwise our dates would be wrong when we look back (or forward).
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())

# Remember that we can only predict one day in the future as our model needs 8 variables
# as inputs for prediction. We only have all 8 variables until the last day in our dataset.
prediction_columns = ['Wave 1', 'Wave 2', 'Wave 3', 'Wave 4', 'Wave 5']




# Loop through each file
for filename in file_list:
    file_path = os.path.join(data_folder, filename)
    
    dataProcessor = MarketDataProcessor(file_path)

    # Load model
    model = load_model('a_model')

    print("Predicting...")
    # Predicting...

    n_days_for_prediction = 3000

    # Make prediction using the model
    prediction = model.predict(dataProcessor.trainX[-n_days_for_prediction:])

     # Inverse transform using the original scaler
    transformer_predicted = dataProcessor.scaler.inverse_transform(prediction)
    print('transformer_predicted', transformer_predicted)

    # Determine the wave with the largest index for each row
    max_wave_indices = np.argmax(transformer_predicted, axis=1)

    # Generate future prediction dates
    train_dates = dataProcessor.original_data['Date']
    predict_period_dates = pd.date_range(
        list(train_dates)[-n_days_for_prediction], periods=n_days_for_prediction, freq=us_bd).tolist()

    # Convert timestamp to date
    forecast_dates = []
    for period in predict_period_dates:
        forecast_dates.append(period.date())

    # Create a DataFrame for the forecasted values
    df_forecast = pd.DataFrame({
        'Date': forecast_dates,
        'Wave 1': transformer_predicted[:, 0],
        'Wave 2': transformer_predicted[:, 1],
        'Wave 3': transformer_predicted[:, 2],
        'Wave 4': transformer_predicted[:, 3],
        'Wave 5': transformer_predicted[:, 4],
        'Largest_Wave': np.array(prediction_columns)[max_wave_indices]
    })

    # Filter original data to keep only dates after '2023-01-01'
    #df_original = dataProcessor.original_data[dataProcessor.original_data['Date'] >= '2023-01-01'].copy()
    df_original = dataProcessor.original_data[-n_days_for_prediction:].copy()

    # Convert the 'Date' column to datetime
    df_original['Date'] = pd.to_datetime(df_original['Date'])
    df_forecast['Date'] = pd.to_datetime(df_forecast['Date'])
    
    # Get the first date from df_forecast
    first_forecast_date = df_forecast['Date'].iloc[0]

    # Save the plot as an image file in the output folder
    output_filename_close = os.path.join(output_folder, os.path.splitext(filename)[0] + '_ew_plot.png')
    print_plot_prediction_waves(output_filename_close, df_original, df_forecast)
 
 

Predicting...
transformer_predicted [[1.8820719e+01 1.9032335e+01 5.7577389e+01 5.8249241e+01 6.6834125e+08]
 [1.8820719e+01 1.9032335e+01 5.7577389e+01 5.8249241e+01 6.6834125e+08]
 [1.8820719e+01 1.9032335e+01 5.7577389e+01 5.8249241e+01 6.6834125e+08]
 ...
 [1.8820719e+01 1.9032335e+01 5.7577389e+01 5.8249241e+01 6.6834125e+08]
 [1.8820719e+01 1.9032335e+01 5.7577389e+01 5.8249241e+01 6.6834125e+08]
 [1.8820719e+01 1.9032335e+01 5.7577389e+01 5.8249241e+01 6.6834125e+08]]
Predicting...
transformer_predicted [[1.7055115e+01 1.7715139e+01 3.9981010e+01 4.0853256e+01 4.6849912e+07]
 [1.7055115e+01 1.7715139e+01 3.9981010e+01 4.0853256e+01 4.6849912e+07]
 [1.7055115e+01 1.7715139e+01 3.9981010e+01 4.0853256e+01 4.6849912e+07]
 ...
 [1.7055115e+01 1.7715139e+01 3.9981010e+01 4.0853256e+01 4.6849912e+07]
 [1.7055115e+01 1.7715139e+01 3.9981010e+01 4.0853256e+01 4.6849912e+07]
 [1.7055115e+01 1.7715139e+01 3.9981010e+01 4.0853256e+01 4.6849912e+07]]
Predicting...
transformer_predicted [[8.

IndexError: list index out of range

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>