<a href="https://colab.research.google.com/github/arzhrd/Stock_Market_Prediction_5Mintes/blob/main/Grow_market__Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Install required libraries in Colab
!pip install yfinance pandas numpy scikit-learn tensorflow

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
import yfinance as yf
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

# Step 1: Load and preprocess data
def load_data(ticker='TATASTEEL.NS', period='5d', interval='5m'):
    df = yf.download(ticker, period=period, interval=interval, auto_adjust=False)
    print(f"Initial data shape: {df.shape}")
    df['SMA5'] = df['Close'].rolling(window=5).mean()
    df['RSI'] = compute_rsi(df['Close'], 14)
    df['MACD'], df['Signal'] = compute_macd(df['Close'])
    # Add Bollinger Bands
    df['BB_Middle'] = df['Close'].rolling(window=20).mean()
    df['BB_Std'] = df['Close'].rolling(window=20).std()
    df['BB_Upper'] = df['BB_Middle'] + 2 * df['BB_Std']
    df['BB_Lower'] = df['BB_Middle'] - 2 * df['BB_Std']
    df = df.dropna()
    print(f"Data shape after dropping NaNs: {df.shape}")
    return df

def compute_rsi(data, periods=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=periods).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=periods).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def compute_macd(data, slow=26, fast=12, signal=9):
    ema_fast = data.ewm(span=fast, adjust=False).mean()
    ema_slow = data.ewm(span=slow, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal_line

# Step 2: Prepare data for LSTM
def prepare_data(df, lookback=60):
    if len(df) < lookback:
        raise ValueError(f"Dataset too small: {len(df)} rows, need at least {lookback} after NaN removal")

    scaler = MinMaxScaler(feature_range=(0, 1))
    features = ['Close', 'SMA5', 'RSI', 'MACD', 'Signal', 'BB_Upper', 'BB_Lower', 'BB_Middle']
    scaled_data = scaler.fit_transform(df[features])
    print(f"Scaled data shape: {scaled_data.shape}")

    X, y = [], []
    for i in range(lookback, len(scaled_data)):
        X.append(scaled_data[i-lookback:i])
        y.append(scaled_data[i, 0])
    X, y = np.array(X), np.array(y)

    print(f"X shape: {X.shape}, y shape: {y.shape}")
    if len(X) == 0:
        raise ValueError("No samples generated. Increase data period or reduce lookback.")

    train_size = int(len(X) * 0.8)
    if train_size == 0:
        raise ValueError("Not enough samples for training. Need more data.")

    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

    return X_train, X_test, y_train, y_test, scaler, df[features].columns

# Step 3: Build and train LSTM model
def build_model(lookback, n_features):
    model = Sequential([
        Input(shape=(lookback, n_features)),
        LSTM(units=50, return_sequences=True),
        Dropout(0.2),
        LSTM(units=50),
        Dropout(0.2),
        Dense(units=1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Step 4: Evaluate model accuracy
def evaluate_model(model, X_test, y_test, scaler, feature_columns):
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler.inverse_transform(
        np.hstack([y_pred_scaled, np.zeros((y_pred_scaled.shape[0], len(feature_columns)-1))])
    )[:, 0]
    y_actual = scaler.inverse_transform(
        np.hstack([y_test.reshape(-1, 1), np.zeros((y_test.shape[0], len(feature_columns)-1))])
    )[:, 0]

    # Calculate RMSE and MAPE
    rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
    mape = mean_absolute_percentage_error(y_actual, y_pred) * 100

    # Calculate Prediction Accuracy Percentage (within ±1% of actual)
    tolerance = 0.01
    within_tolerance = np.abs((y_pred - y_actual) / y_actual) <= tolerance
    accuracy_percentage = (np.sum(within_tolerance) / len(y_actual)) * 100

    return rmse, mape, accuracy_percentage, y_pred, y_actual

# Step 5: Main execution
try:
    df = load_data(period='5d')
    X_train, X_test, y_train, y_test, scaler, feature_columns = prepare_data(df)
    model = build_model(lookback=60, n_features=len(feature_columns))
    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)

    # Evaluate model
    rmse, mape, accuracy_percentage, y_pred, y_actual = evaluate_model(model, X_test, y_test, scaler, feature_columns)
    print(f"Model RMSE: {rmse:.2f} INR")
    print(f"Model MAPE: {mape:.2f}%")
    print(f"Prediction Accuracy (within ±1%): {accuracy_percentage:.2f}%")

    # Step 6: Predict next 5-minute price
    last_60 = df[feature_columns].tail(60)
    scaled_last_60 = scaler.transform(last_60)
    X_pred = np.array([scaled_last_60])
    print(f"X_pred shape: {X_pred.shape}")
    pred_scaled = model.predict(X_pred)
    pred_price = scaler.inverse_transform(
        np.hstack([pred_scaled, np.zeros((pred_scaled.shape[0], len(feature_columns)-1))])
    )[0, 0]

    print(f"Predicted Tata Steel price in 5 minutes: {pred_price:.2f} INR")

except ValueError as e:
    print(f"Error: {e}")



[*********************100%***********************]  1 of 1 completed

Initial data shape: (342, 6)
Data shape after dropping NaNs: (323, 14)
Scaled data shape: (323, 8)
X shape: (263, 60, 8), y shape: (263,)
X_train shape: (210, 60, 8), y_train shape: (210,)
X_test shape: (53, 60, 8), y_test shape: (53,)
Epoch 1/50





[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 55ms/step - loss: 0.1300
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 74ms/step - loss: 0.0347
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 72ms/step - loss: 0.0249
Epoch 4/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - loss: 0.0159
Epoch 5/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 74ms/step - loss: 0.0220
Epoch 6/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 71ms/step - loss: 0.0185
Epoch 7/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 71ms/step - loss: 0.0132
Epoch 8/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step - loss: 0.0169
Epoch 9/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 71ms/step - loss: 0.0126
Epoch 10/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0152
Epoch 11/50
[1m7/7[0m [32m━