In [2]:
import os
import numpy as np
import seaborn as sns
from sklearn.discriminant_analysis import StandardScaler
from tensorflow.keras.models import load_model
from dataProcessor import MarketDataProcessor
import shutil  # Import the shutil module for file operations
import pandas as pd

from utils_plot import print_plot_prediction_close
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import USFederalHolidayCalendar


# Define the path to the data folder
data_folder = 'data/predict'
output_folder = 'generated_plots'  # Specify the output folder

# Get a list of all files in the data folder
file_list = [filename for filename in os.listdir(data_folder) if filename.endswith('.csv')]

# Clean the output folder before each run
if os.path.exists(output_folder):
    shutil.rmtree(output_folder)  # Remove the folder and its contents

# Create the output folder
os.makedirs(output_folder)


# Libraries that will help us extract only business days in the US.
# Otherwise our dates would be wrong when we look back (or forward).
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())

# Remember that we can only predict one day in the future as our model needs 8 variables
# as inputs for prediction. We only have all 8 variables until the last day in our dataset.
n_days_for_prediction = 100  # number of days to feed the network

def transform_prediction(prediction, dataProcessor, n_days_for_pred):
    
    # Create dummy columns for missing features
    num_missing_features = 7
    dummy_columns = np.repeat(prediction, num_missing_features, axis=1)

    # Concatenate the prediction and dummy columns
    prediction_with_dummy = np.concatenate((prediction, dummy_columns), axis=1)

    #print('prediction_with_dummy', prediction_with_dummy)
    # Inverse transform using the original scaler
    transformer_predicted = dataProcessor.scaler.inverse_transform(prediction_with_dummy)
    print('transformer_predicted', transformer_predicted)
    y_pred_future_close = transformer_predicted[:, 0]

    # Separate dates for future plotting
    train_dates = dataProcessor.original_data['Date']

    # Generate future prediction dates
    predict_period_dates = pd.date_range(
        list(train_dates)[-n_days_for_pred], periods=n_days_for_pred, freq=us_bd).tolist()

    # Convert timestamp to date
    forecast_dates = []
    for period in predict_period_dates:
        forecast_dates.append(period.date())

    # Create a DataFrame for the forecasted values
    df_forecast = pd.DataFrame({
        'Date': forecast_dates,
        'Close': y_pred_future_close,
        # 'CloseOpen': y_pred_future_closeopen
    })

    return df_forecast


# Loop through each file
for filename in file_list:
    file_path = os.path.join(data_folder, filename)
    
    dataProcessor = MarketDataProcessor(file_path)

    # Load model
    model = load_model('a_model')

    print("Predicting...")
    # Predicting...

    # Make prediction using the model
    prediction = model.predict(dataProcessor.trainX[-n_days_for_prediction:])
    df_forecast = transform_prediction(prediction, dataProcessor, n_days_for_prediction)

    # Filter original data to keep only dates after '2023-01-01'
    df_original = dataProcessor.original_data[dataProcessor.original_data['Date'] >= '2023-01-01'].copy()

    # Convert the 'Date' column to datetime
    df_original['Date'] = pd.to_datetime(df_original['Date'])
    df_forecast['Date'] = pd.to_datetime(df_forecast['Date'])
    #df_forecast_2['Date'] = pd.to_datetime(df_forecast_2['Date'])
    #print('df_forecast', df_forecast)
    
    # Get the first date from df_forecast
    first_forecast_date = df_forecast['Date'].iloc[0]

    # Remove data from original DataFrame after the first forecast date
    #df_original = df_original[df_original['Date'] <= first_forecast_date]

    # Save the plot as an image file in the output folder
    output_filename_close = os.path.join(output_folder, os.path.splitext(filename)[0] + '_close_plot.png')
    print_plot_prediction_close(output_filename_close, df_original, df_forecast, df_forecast, 'Close')

    # Save the plot as an image file in the output folder
    #output_filename_closeopen = os.path.join(output_folder, os.path.splitext(filename)[0] + '_close_open_plot.png')
    #print_plot_prediction_close(output_filename_closeopen, df_original, df_forecast, 'CloseOpen')
 
 

Predicting...
transformer_predicted [[1.8897519  1.8963889  1.8811449  1.889552   0.8962903  0.83839715
  0.0434241  0.7925806 ]
 [1.9111553  1.9178675  1.9024508  1.9109533  0.90644175 0.8478929
  0.04453673 0.8128835 ]
 [1.9327648  1.9395529  1.9239619  1.9325604  0.9166909  0.85748
  0.04566007 0.8333818 ]
 [1.9243766  1.9311353  1.9156119  1.9241731  0.91271245 0.8537585
  0.04522402 0.8254249 ]
 [1.9342166  1.9410098  1.9254069  1.9340119  0.91737944 0.8581241
  0.04573554 0.8347589 ]
 [1.9220437  1.9287941  1.9132895  1.9218404  0.91160595 0.8527235
  0.04510275 0.8232119 ]
 [1.9057485  1.9124417  1.8970686  1.9055469  0.9038773  0.8454941
  0.04425566 0.80775464]
 [1.8854692  1.8920912  1.8768816  1.8852698  0.89425904 0.83649707
  0.04320147 0.7885181 ]
 [1.8614379  1.8679755  1.8529598  1.861241   0.88286126 0.82583547
  0.04195224 0.7657225 ]
 [1.866963   1.87352    1.8584598  1.8667656  0.8854818  0.8282867
  0.04223946 0.77096355]
 [1.8866328  1.8932588  1.8780398  1.886433

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>