In [18]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout
from sklearn.preprocessing import MinMaxScaler
import joblib  # For saving the scaler
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# Example usage
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import MinMaxScaler
import numpy as np

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pickle


In [25]:
def preprocess_stock_data(
    csv_path, 
    date_column='Date', 
    close_column='Close', 
    test_size=0.2, 
    time_steps=60, 
    scaler_output_file='scaler.pkl'
):
    """
    Prepares stock market price data for time series modeling, with training and test split.

    Args:
        csv_path (str): Path to the CSV file containing the data.
        date_column (str): Name of the date column in the CSV.
        close_column (str): Name of the closing price column in the CSV.
        test_size (float): Proportion of the data for testing.
        time_steps (int): Number of past time steps to use for each sample.
        scaler_output_file (str): Path to save the fitted MinMaxScaler instance.

    Returns:
        X_train (numpy.ndarray): Training data (features).
        y_train (numpy.ndarray): Training labels.
        X_test (numpy.ndarray): Testing data (features).
        y_test (numpy.ndarray): Testing labels.
    """
    try:
        # Load the dataset
        if not os.path.exists(csv_path):
            raise FileNotFoundError(f"CSV file not found at path: {csv_path}")

        data = pd.read_csv(csv_path, parse_dates=[date_column])
        data.sort_values(by=date_column, inplace=True)

        print(f"CSV file successfully loaded. Columns: {data.columns}")
    except Exception as e:
        raise ValueError(f"Error reading CSV file or parsing date column '{date_column}': {e}")
    
    # Validate columns
    if date_column not in data.columns or close_column not in data.columns:
        raise ValueError(f"Columns '{date_column}' or '{close_column}' not found in dataset. Available columns: {data.columns}")

    try:
        # Extract the 'close' column for scaling
        close_prices = data[close_column].values.reshape(-1, 1)
    except Exception as e:
        raise ValueError(f"Error accessing '{close_column}' column: {e}")
    
    # Scale the data
    try:
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_close = scaler.fit_transform(close_prices)

        # Save the scaler to a file
        with open(scaler_output_file, 'wb') as f:
            pickle.dump(scaler, f)
        print(f"Scaler saved to {scaler_output_file}")
    except Exception as e:
        raise IOError(f"Error during scaling or saving scaler: {e}")
    
    # Create sequences of time_steps
    X, y = [], []
    try:
        for i in range(time_steps, len(scaled_close)):
            X.append(scaled_close[i - time_steps:i])
            y.append(scaled_close[i])
    except Exception as e:
        raise ValueError(f"Error creating sequences of time_steps: {e}")
    
    X, y = np.array(X), np.array(y)
    
    # Split data into train and test sets
    try:
        train_size = int((1 - test_size) * len(X))
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]
    except Exception as e:
        raise ValueError(f"Error splitting data into train and test sets: {e}")
    
    print(f"Data split: {len(X_train)} training samples, {len(X_test)} testing samples")
    
    return X_train, y_train, X_test, y_test





In [27]:
data = "inputs/google_stock_cleaned.csv"


try:
    X_train, y_train, X_test, y_test = preprocess_stock_data(
        data, 
        date_column='Date', 
        close_column='Close', 
        test_size=0.2, 
        time_steps=60, 
        scaler_output_file='scaler.pkl'
    )

    # Outputs
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")
except ValueError as ve:
    print(f"ValueError: {ve}")
except Exception as e:
    print(f"Unexpected Error: {e}")


CSV file successfully loaded. Columns: Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'daily_return', '20_day_ma', '50_day_ma'],
      dtype='object')
Scaler saved to scaler.pkl
Data split: 554 training samples, 139 testing samples
X_train shape: (554, 60, 1), y_train shape: (554, 1)
X_test shape: (139, 60, 1), y_test shape: (139, 1)


In [39]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

X_train shape: (554, 60, 1)
y_train shape: (554, 1)


In [36]:
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
import tensorflow as tf
import joblib

import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
import tensorflow as tf
import joblib

class StockPricePredictor_Google:
    def __init__(self, input_shape, learning_rate=0.0007971184552975506, num_layers=2, units=256):
        self.input_shape = input_shape  # Should be (time_steps, features)
        self.learning_rate = learning_rate
        self.num_layers = num_layers
        self.units = units
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        for i in range(self.num_layers):
            # Add RNN layers
            return_sequences = i < (self.num_layers - 1)
            model.add(SimpleRNN(self.units, activation='relu', return_sequences=return_sequences, input_shape=self.input_shape))
            model.add(Dropout(0.2))  # Regularization

        # Output layer for regression (single output: close value)
        model.add(Dense(1))

        # Compile the model
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
            loss=tf.keras.losses.MeanSquaredError(),  # Explicitly use TensorFlow's implementation
            metrics=[tf.keras.metrics.MeanAbsoluteError()]  # Use TensorFlow's metric function
        )
        return model

    def train(self, X_train, y_train, batch_size=16, epochs=50, validation_data=None):
        # Validate and reshape the training data
        X_train = self._validate_and_reshape(X_train, expected_shape=(None, *self.input_shape))
        if validation_data:
            X_val = self._validate_and_reshape(validation_data[0], expected_shape=(None, *self.input_shape))
            validation_data = (X_val, validation_data[1])

        # Train the model
        history = self.model.fit(
            X_train, y_train, 
            batch_size=batch_size, 
            epochs=epochs, 
            validation_data=validation_data,
            verbose=2
        )
        return history

    def predict(self, X):
        # Validate and reshape the input data
        X = self._validate_and_reshape(X, expected_shape=(None, *self.input_shape))
        return self.model.predict(X)

    def save(self, model_path, scaler_path, scaler):
        # Save the model
        self.model.save(model_path)
        print(f"Model saved to {model_path}")

        # Save the scaler
        joblib.dump(scaler, scaler_path)
        print(f"Scaler saved to {scaler_path}")

    @staticmethod
    def load(model_path, scaler_path):
        # Load the model
        model = load_model(model_path)
        print(f"Model loaded from {model_path}")

        # Load the scaler
        scaler = joblib.load(scaler_path)
        print(f"Scaler loaded from {scaler_path}")

        return model, scaler

    def _validate_and_reshape(self, X, expected_shape):
        """
        Ensures input data has the correct shape for the model.
        
        Args:
            X: Input data to validate and reshape.
            expected_shape: Tuple representing the expected shape (e.g., (None, time_steps, features)).
        
        Returns:
            Reshaped data with the correct shape.
        """
        if len(X.shape) == 2:  # If shape is (num_samples, time_steps)
            reshaped = X[..., np.newaxis]  # Add a new axis for features
            print(f"Reshaped input from {X.shape} to {reshaped.shape}")
            return reshaped
        elif len(X.shape) == len(expected_shape):  # Correct shape
            print(f"Input shape is valid: {X.shape}")
            return X
        else:
            raise ValueError(f"Invalid input shape {X.shape}. Expected shape {expected_shape}.")


    




In [37]:
data_path_google = "inputs/google_stock_cleaned.csv"

output_file = "scaled_stock_data_close_option.pkl"





In [38]:
# Sample Usage
if __name__ == "__main__":
   
    # Scale data
    #scaler = MinMaxScaler()
    #y_train_scaled = scaler.fit_transform(y_train.reshape(-1, 1)).flatten()
    #y_val_scaled = scaler.transform(y_val.reshape(-1, 1)).flatten()

    # Initialize and train the predictor
    predictor = StockPricePredictor_Google(input_shape=(X_train.shape[1], X_train.shape[2]))
    history = predictor.train(X_train, y_train, batch_size=16, epochs=20, validation_data=(X_val, y_val))

    # Save model and scaler
    model_path = "stock_price_rnn_model.h5"
    
    predictor.save(model_path, scaler_path, scaler)

    # Load model and scaler for inference
    loaded_model, loaded_scaler = StockPricePredictor.load(model_path, scaler_path)
    predictions = loaded_model.predict(X_val)
    predictions_rescaled = loaded_scaler.inverse_transform(predictions)

    print("Predictions (rescaled):", predictions_rescaled[:5])

Input shape is valid: (554, 60, 1)


ValueError: Invalid input shape (0,). Expected shape (None, 60, 1).

In [41]:
# Example input data
input_shape = (60, 1)  # 60 time steps, 1 feature (e.g., close price)
predictor = StockPricePredictor_Google(input_shape=input_shape)

# Training
predictor.train(X_train, y_train, validation_data=(X_test, y_test))

# Prediction
predictions = predictor.predict(X_test)

Input shape is valid: (554, 60, 1)
Input shape is valid: (139, 60, 1)
Epoch 1/50
35/35 - 5s - 131ms/step - loss: 0.0106 - mean_absolute_error: 0.0675 - val_loss: 0.0038 - val_mean_absolute_error: 0.0557
Epoch 2/50
35/35 - 1s - 42ms/step - loss: 0.0025 - mean_absolute_error: 0.0377 - val_loss: 0.0014 - val_mean_absolute_error: 0.0275
Epoch 3/50
35/35 - 1s - 38ms/step - loss: 0.0023 - mean_absolute_error: 0.0369 - val_loss: 0.0014 - val_mean_absolute_error: 0.0308
Epoch 4/50
35/35 - 1s - 38ms/step - loss: 0.0020 - mean_absolute_error: 0.0347 - val_loss: 0.0045 - val_mean_absolute_error: 0.0621
Epoch 5/50
35/35 - 1s - 39ms/step - loss: 0.0017 - mean_absolute_error: 0.0305 - val_loss: 9.3528e-04 - val_mean_absolute_error: 0.0244
Epoch 6/50
35/35 - 1s - 41ms/step - loss: 0.0024 - mean_absolute_error: 0.0369 - val_loss: 0.0012 - val_mean_absolute_error: 0.0287
Epoch 7/50
35/35 - 1s - 38ms/step - loss: 0.0018 - mean_absolute_error: 0.0322 - val_loss: 0.0041 - val_mean_absolute_error: 0.0589
E

In [44]:
if __name__ == "__main__":
    # Example input data
    input_shape = (60, 1)  # 60 time steps, 1 feature (e.g., close price)
    predictor = StockPricePredictor_Google(input_shape=input_shape)

    # Training
    predictor.train(X_train, y_train, validation_data=(X_test, y_test))

    # Prediction
    predictions = predictor.predict(X_test)

    # Save model and scaler
    model_path = "outputs/stock_price_rnn_model.h5"
    scaler_path = "outputs/google_scale.pkl"
    
    predictor.save(model_path, scaler_path, scaler)

    # Load model and scaler for inference
    loaded_model, loaded_scaler = StockPricePredictor.load(model_path, scaler_path)
    predictions = loaded_model.predict(X_val)
    predictions_rescaled = loaded_scaler.inverse_transform(predictions)

    print("Predictions (rescaled):", predictions_rescaled)

    

Input shape is valid: (554, 60, 1)
Input shape is valid: (139, 60, 1)
Epoch 1/50
35/35 - 4s - 116ms/step - loss: 0.0115 - mean_absolute_error: 0.0670 - val_loss: 9.0537e-04 - val_mean_absolute_error: 0.0230
Epoch 2/50
35/35 - 1s - 40ms/step - loss: 0.0024 - mean_absolute_error: 0.0380 - val_loss: 0.0041 - val_mean_absolute_error: 0.0588
Epoch 3/50
35/35 - 1s - 39ms/step - loss: 0.0022 - mean_absolute_error: 0.0358 - val_loss: 0.0081 - val_mean_absolute_error: 0.0853
Epoch 4/50
35/35 - 1s - 39ms/step - loss: 0.0020 - mean_absolute_error: 0.0350 - val_loss: 7.5118e-04 - val_mean_absolute_error: 0.0204
Epoch 5/50
35/35 - 1s - 39ms/step - loss: 0.0022 - mean_absolute_error: 0.0355 - val_loss: 7.4067e-04 - val_mean_absolute_error: 0.0199
Epoch 6/50
35/35 - 1s - 40ms/step - loss: 0.0016 - mean_absolute_error: 0.0319 - val_loss: 0.0025 - val_mean_absolute_error: 0.0435
Epoch 7/50
35/35 - 1s - 39ms/step - loss: 0.0019 - mean_absolute_error: 0.0337 - val_loss: 0.0012 - val_mean_absolute_error: 

NameError: name 'scaler' is not defined