In [1]:
import os
import numpy as np
import seaborn as sns
from tensorflow.keras.models import load_model
from dataProcessor import MarketDataProcessor
import shutil  # Import the shutil module for file operations
import pandas as pd

from utils_plot import print_plot_prediction_close
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import USFederalHolidayCalendar


# Define the path to the data folder
data_folder = 'data/predict'
output_folder = 'generated_plots'  # Specify the output folder

# Get a list of all files in the data folder
file_list = [filename for filename in os.listdir(data_folder) if filename.endswith('.csv')]

# Clean the output folder before each run
if os.path.exists(output_folder):
    shutil.rmtree(output_folder)  # Remove the folder and its contents

# Create the output folder
os.makedirs(output_folder)


# Libraries that will help us extract only business days in the US.
# Otherwise our dates would be wrong when we look back (or forward).
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())

# Remember that we can only predict one day in the future as our model needs 8 variables
# as inputs for prediction. We only have all 8 variables until the last day in our dataset.
n_past = 5
n_past_2 = 10
n_days_for_prediction = 5  # number of days to feed the network
n_days_for_prediction_2 = 5  # number of days to feed the network

def transform_prediction(prediction, dataProcessor, n_days_for_pred, n_days_past):
    
    # Create dummy columns for missing features
    num_missing_features = 7 - prediction.shape[1]
    dummy_columns = np.zeros((prediction.shape[0], num_missing_features))

    # Concatenate the prediction and dummy columns
    prediction_with_dummy = np.concatenate((prediction, dummy_columns), axis=1)

    # Inverse transform using the original scaler
    transformer_predicted = dataProcessor.scaler.inverse_transform(prediction_with_dummy)
    
    y_pred_future_close = transformer_predicted[:, 0]
    
    # Separate dates for future plotting
    train_dates = dataProcessor.original_data['Date']

    # Generate future prediction dates
    predict_period_dates = pd.date_range(
        list(train_dates)[-n_days_past], periods=n_days_for_pred, freq=us_bd).tolist()

    # Convert timestamp to date
    forecast_dates = []
    for period in predict_period_dates:
        forecast_dates.append(period.date())

    # Create a DataFrame for the forecasted values
    df_forecast = pd.DataFrame({
        'Date': forecast_dates,
        'Close': y_pred_future_close,
        # 'CloseOpen': y_pred_future_closeopen
    })

    return df_forecast


# Loop through each file
for filename in file_list:
    file_path = os.path.join(data_folder, filename)
    
    dataProcessor = MarketDataProcessor(file_path)

    # Load model
    model = load_model('a_model')

    print("Predicting...")
    # Predicting...

    # Make prediction using the model
    prediction = model.predict(dataProcessor.trainX[-n_days_for_prediction - n_past:-n_past])
    prediction_2 = model.predict(dataProcessor.trainX[-n_days_for_prediction_2 - n_past_2:-n_past_2])

    df_forecast = transform_prediction(prediction, dataProcessor, n_days_for_prediction, n_past)
    df_forecast_2 = transform_prediction(prediction_2, dataProcessor, n_days_for_prediction_2, n_past_2)
    # print('y_pred_future_close', y_pred_future_close)


    # Filter original data to keep only dates after '2023-01-01'
    df_original = dataProcessor.original_data[dataProcessor.original_data['Date'] >= '2023-01-01'].copy()

    # Convert the 'Date' column to datetime
    df_original['Date'] = pd.to_datetime(df_original['Date'])
    df_forecast['Date'] = pd.to_datetime(df_forecast['Date'])
    df_forecast_2['Date'] = pd.to_datetime(df_forecast_2['Date'])
    #print('df_forecast', df_forecast)
    
    # Get the first date from df_forecast
    first_forecast_date = df_forecast['Date'].iloc[0]

    # Remove data from original DataFrame after the first forecast date
    df_original = df_original[df_original['Date'] <= first_forecast_date]

    # Save the plot as an image file in the output folder
    output_filename_close = os.path.join(output_folder, os.path.splitext(filename)[0] + '_close_plot.png')
    print_plot_prediction_close(output_filename_close, df_original, df_forecast, df_forecast_2, 'Close')

    # Save the plot as an image file in the output folder
    #output_filename_closeopen = os.path.join(output_folder, os.path.splitext(filename)[0] + '_close_open_plot.png')
    #print_plot_prediction_close(output_filename_closeopen, df_original, df_forecast, 'CloseOpen')
 
 

trainx [[[ 0.75074757  0.73084153  0.75539035 ... -0.36739044 -0.37953043
   -1.10334242]
  [ 0.73859446  0.71976239  0.72809474 ... -0.09176559 -1.05497918
   -1.10334242]
  [ 0.7117305   0.73591866  0.73405022 ...  0.08322789  1.94526263
    0.99357944]
  ...
  [ 0.91141031  0.89215234  0.90319841 ... -0.12598295 -0.94347157
   -1.10334242]
  [ 0.87825863  0.87008063  0.89522748 ... -0.36786193  0.22720214
    0.99357944]
  [ 0.88218599  1.13456722  0.90479497 ...  3.99552447  0.05712961
    0.99357944]]

 [[ 0.73859446  0.71976239  0.72809474 ... -0.09176559 -1.05497918
   -1.10334242]
  [ 0.7117305   0.73591866  0.73405022 ...  0.08322789  1.94526263
    0.99357944]
  [ 0.74684784  0.74811949  0.7644599  ... -0.22627752  0.72945489
    0.99357944]
  ...
  [ 0.87825863  0.87008063  0.89522748 ... -0.36786193  0.22720214
    0.99357944]
  [ 0.88218599  1.13456722  0.90479497 ...  3.99552447  0.05712961
    0.99357944]
  [ 0.88218599  0.90715149  0.90676301 ...  0.06995856  1.83797169

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>