In [6]:
import pandas as pd
import numpy as np
import yfinance as yf
from pprint import pprint


In [7]:
def stock_data(ticker):
    stock = yf.Ticker(ticker)
    stock_prices = stock.history(period='5y', interval='1d')
    return stock_prices

In [8]:
df = stock_data('INFY.NS')
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-11-14 00:00:00+05:30,619.580269,631.547473,619.580269,627.543579,10539902,0.0,0.0
2019-11-15 00:00:00+05:30,630.880113,634.350176,624.874273,626.297913,7362623,0.0,0.0
2019-11-18 00:00:00+05:30,627.276567,631.725337,622.204959,627.454529,7490916,0.0,0.0
2019-11-19 00:00:00+05:30,631.636438,635.284408,627.276622,634.261169,6495885,0.0,0.0
2019-11-20 00:00:00+05:30,634.216814,636.174284,627.276741,634.394775,6142171,0.0,0.0
...,...,...,...,...,...,...,...
2024-11-08 00:00:00+05:30,1818.000000,1840.599976,1813.150024,1829.949951,4210960,0.0,0.0
2024-11-11 00:00:00+05:30,1829.000000,1868.000000,1822.550049,1860.099976,3804234,0.0,0.0
2024-11-12 00:00:00+05:30,1871.099976,1881.000000,1861.000000,1868.800049,5012450,0.0,0.0
2024-11-13 00:00:00+05:30,1861.099976,1873.199951,1856.300049,1868.400024,4257495,0.0,0.0


In [9]:
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits'], dtype='object')

In [10]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.layers import Bidirectional, Conv1D, MaxPooling1D, GlobalAveragePooling1D
from tensorflow.keras.layers import Add, Concatenate, LeakyReLU, LayerNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

class EnhancedStockPricePredictor:
    def __init__(self, sequence_length=10):
        self.sequence_length = sequence_length
        self.scaler = MinMaxScaler()
        self.model = None
        
    def prepare_data(self, df):
        # Enhanced feature engineering
        df['Returns'] = df['Close'].pct_change()
        df['Target'] = (df['Returns'].shift(-1) > 0).astype(int)
        
        # Technical indicators with multiple timeframes
        for window in [5, 10, 20, 50]:
            df[f'MA{window}'] = df['Close'].rolling(window=window).mean()
            df[f'Std{window}'] = df['Close'].rolling(window=window).std()
            df[f'Vol{window}'] = df['Volume'].rolling(window=window).mean()
        
        # Enhanced RSI calculation
        for timeperiod in [7, 14, 21]:
            delta = df['Close'].diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=timeperiod).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=timeperiod).mean()
            rs = gain / loss
            df[f'RSI{timeperiod}'] = 100 - (100 / (1 + rs))
        
        # MACD with different parameters
        for (fast, slow) in [(12, 26), (8, 21)]:
            exp1 = df['Close'].ewm(span=fast, adjust=False).mean()
            exp2 = df['Close'].ewm(span=slow, adjust=False).mean()
            df[f'MACD_{fast}_{slow}'] = exp1 - exp2
        
        # Momentum indicators
        df['MOM5'] = df['Close'].diff(5)
        df['MOM10'] = df['Close'].diff(10)
        
        # Volatility indicators
        df['ATR'] = (df['High'] - df['Low']).rolling(window=14).mean()
        
        # Drop any rows with NaN values
        df = df.dropna()
        
        # Select all numerical columns except 'Target' and 'Returns'
        feature_columns = df.select_dtypes(include=[np.number]).columns.difference(['Target', 'Returns'])
        
        # Scale the features
        scaled_features = self.scaler.fit_transform(df[feature_columns])
        
        # Create sequences with overlap
        X, y = [], []
        for i in range(len(df) - self.sequence_length):
            X.append(scaled_features[i:(i + self.sequence_length)])
            y.append(df['Target'].iloc[i + self.sequence_length])
            
        return np.array(X), np.array(y)
    
    def build_model(self, input_shape):
        # Input layer
        inputs = Input(shape=input_shape)
        
        # CNN Branch with same padding and no pooling
        conv1 = Conv1D(64, 3, padding='same')(inputs)
        conv1 = LeakyReLU()(conv1)
        conv1 = BatchNormalization()(conv1)
        
        conv2 = Conv1D(128, 3, padding='same')(conv1)
        conv2 = LeakyReLU()(conv2)
        conv2 = BatchNormalization()(conv2)
        
        # Bidirectional LSTM Branch
        lstm1 = Bidirectional(LSTM(64, return_sequences=True))(inputs)
        lstm1 = LayerNormalization()(lstm1)
        lstm1 = Dropout(0.3)(lstm1)
        
        lstm2 = Bidirectional(LSTM(64, return_sequences=True))(lstm1)
        lstm2 = LayerNormalization()(lstm2)
        lstm2 = Dropout(0.3)(lstm2)
        
        # Merge CNN and LSTM branches (now they have matching sequence lengths)
        merged = Concatenate()([conv2, lstm2])
        
        # Additional processing
        x = Bidirectional(LSTM(128, return_sequences=False))(merged)
        x = LayerNormalization()(x)
        x = Dropout(0.4)(x)
        
        # Dense layers with skip connections
        dense1 = Dense(128)(x)
        dense1 = LeakyReLU()(dense1)
        dense1 = BatchNormalization()(dense1)
        dense1 = Dropout(0.4)(dense1)
        
        dense2 = Dense(64)(dense1)
        dense2 = LeakyReLU()(dense2)
        dense2 = BatchNormalization()(dense2)
        dense2 = Dropout(0.3)(dense2)
        
        # Output layer
        outputs = Dense(1, activation='sigmoid')(dense2)
        
        # Create model
        model = Model(inputs=inputs, outputs=outputs)
        
        # Compile with custom Adam configuration
        optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07)
        model.compile(
            optimizer=optimizer,
            loss='binary_crossentropy',
            metrics=['accuracy']
        )
        
        self.model = model
        return model
        
    def train(self, X, y, validation_split=0.2, epochs=100, batch_size=32):
        # Enhanced callbacks
        early_stopping = EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            mode='min'
        )
        
        reduce_lr = ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=5,
            min_lr=1e-6,
            mode='min',
            verbose=1
        )
        
        # Class weights to handle imbalanced data
        class_weights = {
            0: 1 / np.mean(y == 0),
            1: 1 / np.mean(y == 1)
        }
        
        history = self.model.fit(
            X, y,
            validation_split=validation_split,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[early_stopping, reduce_lr],
            class_weight=class_weights,
            shuffle=True,
            verbose=1
        )
        
        return history
    
    def predict(self, X):
        return self.model.predict(X)

# Usage example:
def main():
    # Load your stock data
    df = stock_data('INFY.NS')
    
    # Initialize and prepare the model
    predictor = EnhancedStockPricePredictor(sequence_length=10)
    X, y = predictor.prepare_data(df)
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False
    )
    
    # Build and train the model
    predictor.build_model(input_shape=(X.shape[1], X.shape[2]))
    history = predictor.train(X_train, y_train)
    
    # Evaluate the model
    test_loss, test_accuracy = predictor.model.evaluate(X_test, y_test)
    print(f"Test accuracy: {test_accuracy:.4f}")
    
    # Make predictions for the next day
    last_sequence = X_test[-1:]
    prediction = predictor.predict(last_sequence)
    print(f"Predicted movement for next day: {'Up' if prediction[0][0] > 0.5 else 'Down'}")
    print(f"Confidence: {prediction[0][0]:.4f}")

if __name__ == "__main__":
    main()

2024-11-14 10:01:52.000628: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Max
2024-11-14 10:01:52.000660: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2024-11-14 10:01:52.000665: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2024-11-14 10:01:52.000732: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-11-14 10:01:52.000775: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/100


2024-11-14 10:01:57.975548: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
2024-11-14 10:01:58.310946: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 15: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 20: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
Test accuracy: 0.5105
Predicted movement for next day: Down
Confidence: 0.3572
