In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import joblib
import os
import warnings
warnings.filterwarnings('ignore')

class StockPredictor:
    def __init__(self, ticker_symbol, sequence_length=60, test_size=0.2):
        """
        Initialize the Stock Predictor
        
        Args:
            ticker_symbol (str): Stock ticker symbol (e.g., 'AAPL', 'GOOGL')
            sequence_length (int): Number of days to look back for prediction
            test_size (float): Proportion of data to use for testing
        """
        self.ticker = ticker_symbol
        self.sequence_length = sequence_length
        self.test_size = test_size
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        self.model = None
        self.data = None
        self.scaled_data = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        
    def fetch_data(self, period="2y"):
        """
        Fetch stock data from Yahoo Finance
        
        Args:
            period (str): Time period for data ('1y', '2y', '5y', 'max')
        """
        print(f"Fetching data for {self.ticker}...")
        self.data = yf.download(self.ticker, period=period)
        
        if self.data.empty:
            raise ValueError(f"No data found for ticker {self.ticker}")
            
        # Add technical indicators
        self.data['MA_5'] = self.data['Close'].rolling(window=5).mean()
        self.data['MA_20'] = self.data['Close'].rolling(window=20).mean()
        self.data['MA_50'] = self.data['Close'].rolling(window=50).mean()
        self.data['RSI'] = self._calculate_rsi(self.data['Close'])
        self.data['Volume_MA'] = self.data['Volume'].rolling(window=20).mean()
        
        # Remove NaN values
        self.data = self.data.dropna()
        print(f"Data fetched successfully. Shape: {self.data.shape}")
        
    def _calculate_rsi(self, prices, window=14):
        """Calculate Relative Strength Index"""
        delta = prices.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))
    
    def prepare_data(self, features=['Close', 'Volume', 'MA_5', 'MA_20', 'MA_50', 'RSI']):
        """
        Prepare data for training
        
        Args:
            features (list): List of features to use for prediction
        """
        print("Preparing data...")
        
        # Select features
        feature_data = self.data[features].values
        
        # Scale the data
        self.scaled_data = self.scaler.fit_transform(feature_data)
        
        # Create sequences
        X, y = [], []
        for i in range(self.sequence_length, len(self.scaled_data)):
            X.append(self.scaled_data[i-self.sequence_length:i])
            y.append(self.scaled_data[i, 0])  # Predict 'Close' price (first feature)
            
        X, y = np.array(X), np.array(y)
        
        # Split data
        split_idx = int(len(X) * (1 - self.test_size))
        self.X_train = X[:split_idx]
        self.X_test = X[split_idx:]
        self.y_train = y[:split_idx]
        self.y_test = y[split_idx:]
        
        print(f"Training data shape: X={self.X_train.shape}, y={self.y_train.shape}")
        print(f"Testing data shape: X={self.X_test.shape}, y={self.y_test.shape}")
        
    def build_model(self, lstm_units=[50, 50], dropout_rate=0.2):
        """
        Build LSTM model
        
        Args:
            lstm_units (list): Number of units in each LSTM layer
            dropout_rate (float): Dropout rate for regularization
        """
        print("Building model...")
        
        self.model = Sequential()
        
        # First LSTM layer
        self.model.add(LSTM(units=lstm_units[0], 
                           return_sequences=True, 
                           input_shape=(self.X_train.shape[1], self.X_train.shape[2])))
        self.model.add(Dropout(dropout_rate))
        
        # Additional LSTM layers
        for i in range(1, len(lstm_units)):
            return_seq = i < len(lstm_units) - 1
            self.model.add(LSTM(units=lstm_units[i], return_sequences=return_seq))
            self.model.add(Dropout(dropout_rate))
        
        # Dense layer
        self.model.add(Dense(units=25))
        self.model.add(Dense(units=1))
        
        # Compile model
        self.model.compile(optimizer=Adam(learning_rate=0.001), 
                          loss='mean_squared_error',
                          metrics=['mae'])
        
        print("Model built successfully!")
        print(self.model.summary())
        
    def train_model(self, epochs=100, batch_size=32, validation_split=0.1):
        """
        Train the model
        
        Args:
            epochs (int): Number of training epochs
            batch_size (int): Batch size for training
            validation_split (float): Proportion of training data to use for validation
        """
        print("Training model...")
        
        # Callbacks
        early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
        model_checkpoint = ModelCheckpoint(f'{self.ticker}_best_model.h5', 
                                         monitor='val_loss', 
                                         save_best_only=True)
        
        # Train model
        history = self.model.fit(
            self.X_train, self.y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            callbacks=[early_stopping, model_checkpoint],
            verbose=1
        )
        
        print("Model training completed!")
        return history
    
    def evaluate_model(self):
        """Evaluate model performance"""
        print("Evaluating model...")
        
        # Make predictions
        train_predictions = self.model.predict(self.X_train)
        test_predictions = self.model.predict(self.X_test)
        
        # Inverse transform predictions
        train_pred_original = self.scaler.inverse_transform(
            np.concatenate([train_predictions, np.zeros((len(train_predictions), self.scaled_data.shape[1]-1))], axis=1)
        )[:, 0]
        
        test_pred_original = self.scaler.inverse_transform(
            np.concatenate([test_predictions, np.zeros((len(test_predictions), self.scaled_data.shape[1]-1))], axis=1)
        )[:, 0]
        
        # Inverse transform actual values
        train_actual_original = self.scaler.inverse_transform(
            np.concatenate([self.y_train.reshape(-1, 1), np.zeros((len(self.y_train), self.scaled_data.shape[1]-1))], axis=1)
        )[:, 0]
        
        test_actual_original = self.scaler.inverse_transform(
            np.concatenate([self.y_test.reshape(-1, 1), np.zeros((len(self.y_test), self.scaled_data.shape[1]-1))], axis=1)
        )[:, 0]
        
        # Calculate metrics
        train_rmse = np.sqrt(mean_squared_error(train_actual_original, train_pred_original))
        test_rmse = np.sqrt(mean_squared_error(test_actual_original, test_pred_original))
        train_mae = mean_absolute_error(train_actual_original, train_pred_original)
        test_mae = mean_absolute_error(test_actual_original, test_pred_original)
        train_r2 = r2_score(train_actual_original, train_pred_original)
        test_r2 = r2_score(test_actual_original, test_pred_original)
        
        metrics = {
            'train_rmse': train_rmse,
            'test_rmse': test_rmse,
            'train_mae': train_mae,
            'test_mae': test_mae,
            'train_r2': train_r2,
            'test_r2': test_r2
        }
        
        print("Model Evaluation Metrics:")
        print(f"Training RMSE: {train_rmse:.4f}")
        print(f"Testing RMSE: {test_rmse:.4f}")
        print(f"Training MAE: {train_mae:.4f}")
        print(f"Testing MAE: {test_mae:.4f}")
        print(f"Training R²: {train_r2:.4f}")
        print(f"Testing R²: {test_r2:.4f}")
        
        return metrics
    
    def predict_future(self, days=10):
        """
        Predict future stock prices
        
        Args:
            days (int): Number of days to predict
        """
        print(f"Predicting next {days} days...")
        
        # Get last sequence from scaled data
        last_sequence = self.scaled_data[-self.sequence_length:]
        predictions = []
        
        current_sequence = last_sequence.copy()
        
        for _ in range(days):
            # Reshape for prediction
            pred_input = current_sequence.reshape(1, self.sequence_length, self.scaled_data.shape[1])
            
            # Make prediction
            pred = self.model.predict(pred_input, verbose=0)
            predictions.append(pred[0, 0])
            
            # Update sequence (assume other features remain constant)
            new_row = current_sequence[-1].copy()
            new_row[0] = pred[0, 0]  # Update close price
            
            # Shift sequence
            current_sequence = np.vstack([current_sequence[1:], new_row])
        
        # Inverse transform predictions
        predictions = np.array(predictions).reshape(-1, 1)
        dummy_features = np.zeros((len(predictions), self.scaled_data.shape[1] - 1))
        predictions_full = np.concatenate([predictions, dummy_features], axis=1)
        predictions_original = self.scaler.inverse_transform(predictions_full)[:, 0]
        
        return predictions_original
    
    def save_model(self, model_path=None, scaler_path=None):
        """Save model and scaler"""
        if model_path is None:
            model_path = f'{self.ticker}_stock_model.h5'
        if scaler_path is None:
            scaler_path = f'{self.ticker}_scaler.pkl'
            
        self.model.save(model_path)
        joblib.dump(self.scaler, scaler_path)
        
        # Save model parameters
        params = {
            'ticker': self.ticker,
            'sequence_length': self.sequence_length,
            'test_size': self.test_size,
            'data_shape': self.scaled_data.shape if self.scaled_data is not None else None
        }
        joblib.dump(params, f'{self.ticker}_params.pkl')
        
        print(f"Model saved to {model_path}")
        print(f"Scaler saved to {scaler_path}")
        print(f"Parameters saved to {self.ticker}_params.pkl")
    
    def load_model(self, model_path=None, scaler_path=None):
        """Load model and scaler"""
        if model_path is None:
            model_path = f'{self.ticker}_stock_model.h5'
        if scaler_path is None:
            scaler_path = f'{self.ticker}_scaler.pkl'
            
        self.model = load_model(model_path)
        self.scaler = joblib.load(scaler_path)
        
        # Load parameters
        params = joblib.load(f'{self.ticker}_params.pkl')
        self.ticker = params['ticker']
        self.sequence_length = params['sequence_length']
        self.test_size = params['test_size']
        
        print(f"Model loaded from {model_path}")
        print(f"Scaler loaded from {scaler_path}")

# Example usage and training script
def train_stock_model(ticker_symbol, save_model=True):
    """
    Complete training pipeline
    
    Args:
        ticker_symbol (str): Stock ticker symbol
        save_model (bool): Whether to save the trained model
    """
    # Initialize predictor
    predictor = StockPredictor(ticker_symbol)
    
    # Fetch and prepare data
    predictor.fetch_data(period="2y")
    predictor.prepare_data()
    
    # Build and train model
    predictor.build_model(lstm_units=[100, 50], dropout_rate=0.2)
    history = predictor.train_model(epochs=100, batch_size=32)
    
    # Evaluate model
    metrics = predictor.evaluate_model()
    
    # Make future predictions
    future_predictions = predictor.predict_future(days=10)
    
    print("\nFuture Predictions (Next 10 days):")
    for i, pred in enumerate(future_predictions, 1):
        print(f"Day {i}: ${pred:.2f}")
    
    # Save model if requested
    if save_model:
        predictor.save_model()
    
    return predictor, history, metrics, future_predictions

# Function to load and use saved model
def load_and_predict(ticker_symbol, days=10):
    """
    Load saved model and make predictions
    
    Args:
        ticker_symbol (str): Stock ticker symbol
        days (int): Number of days to predict
    """
    predictor = StockPredictor(ticker_symbol)
    predictor.load_model()
    
    # Fetch recent data for prediction
    predictor.fetch_data(period="1y")
    predictor.prepare_data()
    
    # Make predictions
    predictions = predictor.predict_future(days=days)
    
    print(f"\nPredictions for {ticker_symbol} (Next {days} days):")
    for i, pred in enumerate(predictions, 1):
        print(f"Day {i}: ${pred:.2f}")
    
    return predictions

if __name__ == "__main__":
    # Example: Train model for Apple stock
    ticker = "AAPL"
    predictor, history, metrics, predictions = train_stock_model(ticker, save_model=True)
    
    print(f"\nTraining completed for {ticker}")
    print("Model files saved and ready for use in another notebook!")

Fetching data for AAPL...
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Data fetched successfully. Shape: (451, 10)
Preparing data...
Training data shape: X=(312, 60, 6), y=(312,)
Testing data shape: X=(79, 60, 6), y=(79,)
Building model...
Model built successfully!


None
Training model...
Epoch 1/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.1334 - mae: 0.2728



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 187ms/step - loss: 0.1271 - mae: 0.2651 - val_loss: 0.0160 - val_mae: 0.1078
Epoch 2/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.0218 - mae: 0.1173



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step - loss: 0.0217 - mae: 0.1171 - val_loss: 0.0119 - val_mae: 0.0890
Epoch 3/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.0132 - mae: 0.0986



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step - loss: 0.0132 - mae: 0.0984 - val_loss: 0.0062 - val_mae: 0.0627
Epoch 4/100
[1m8/9[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 60ms/step - loss: 0.0123 - mae: 0.0867



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step - loss: 0.0122 - mae: 0.0860 - val_loss: 0.0057 - val_mae: 0.0612
Epoch 5/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - loss: 0.0093 - mae: 0.0774 - val_loss: 0.0066 - val_mae: 0.0682
Epoch 6/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - loss: 0.0073 - mae: 0.0681 - val_loss: 0.0070 - val_mae: 0.0705
Epoch 7/100
[1m8/9[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 61ms/step - loss: 0.0074 - mae: 0.0710



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 84ms/step - loss: 0.0074 - mae: 0.0707 - val_loss: 0.0053 - val_mae: 0.0626
Epoch 8/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - loss: 0.0071 - mae: 0.0662 - val_loss: 0.0104 - val_mae: 0.0864
Epoch 9/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - loss: 0.0061 - mae: 0.0620 - val_loss: 0.0060 - val_mae: 0.0657
Epoch 10/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - loss: 0.0057 - mae: 0.0597 - val_loss: 0.0055 - val_mae: 0.0637
Epoch 11/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - loss: 0.0076 - mae: 0.0690



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step - loss: 0.0076 - mae: 0.0688 - val_loss: 0.0051 - val_mae: 0.0608
Epoch 12/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - loss: 0.0066 - mae: 0.0639 - val_loss: 0.0065 - val_mae: 0.0683
Epoch 13/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 72ms/step - loss: 0.0063 - mae: 0.0626 - val_loss: 0.0054 - val_mae: 0.0627
Epoch 14/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 83ms/step - loss: 0.0058 - mae: 0.0604 - val_loss: 0.0100 - val_mae: 0.0868
Epoch 15/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - loss: 0.0059 - mae: 0.0608 - val_loss: 0.0112 - val_mae: 0.0916
Epoch 16/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - loss: 0.0056 - mae: 0.0580 - val_loss: 0.0059 - val_mae: 0.0598
Epoch 17/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - loss: 0.0067 - mae: 



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 80ms/step - loss: 0.0048 - mae: 0.0558 - val_loss: 0.0047 - val_mae: 0.0577
Epoch 24/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 72ms/step - loss: 0.0052 - mae: 0.0580 - val_loss: 0.0065 - val_mae: 0.0702
Epoch 25/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - loss: 0.0042 - mae: 0.0498 - val_loss: 0.0057 - val_mae: 0.0655
Epoch 26/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 74ms/step - loss: 0.0043 - mae: 0.0515 - val_loss: 0.0069 - val_mae: 0.0726
Epoch 27/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - loss: 0.0040 - mae: 0.0491 - val_loss: 0.0065 - val_mae: 0.0709
Epoch 28/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - loss: 0.0047 - mae: 0.0537 - val_loss: 0.0073 - val_mae: 0.0753
Epoch 29/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - loss: 0.0043 - mae: 



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - loss: 0.0051 - mae: 0.0558 - val_loss: 0.0046 - val_mae: 0.0582
Epoch 33/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 70ms/step - loss: 0.0036 - mae: 0.0477 - val_loss: 0.0066 - val_mae: 0.0704
Epoch 34/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - loss: 0.0038 - mae: 0.0472 - val_loss: 0.0133 - val_mae: 0.1006
Epoch 35/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - loss: 0.0044 - mae: 0.0518 - val_loss: 0.0135 - val_mae: 0.1020
Epoch 36/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - loss: 0.0048 - mae: 0.0513 - val_loss: 0.0132 - val_mae: 0.1001
Epoch 37/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 83ms/step - loss: 0.0044 - mae: 0.0502 - val_loss: 0.0074 - val_mae: 0.0750
Epoch 38/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - loss: 0.0043 - mae: 




Future Predictions (Next 10 days):
Day 1: $197.30
Day 2: $197.55
Day 3: $197.79
Day 4: $198.01
Day 5: $198.19
Day 6: $198.33
Day 7: $198.45
Day 8: $198.54
Day 9: $198.61
Day 10: $198.67
Model saved to AAPL_stock_model.h5
Scaler saved to AAPL_scaler.pkl
Parameters saved to AAPL_params.pkl

Training completed for AAPL
Model files saved and ready for use in another notebook!
