In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import concurrent.futures

#10+ companies ke stock tickers (Modify as needed)
tickers = ["AAPL", "TSLA", "GOOGL", "MSFT", "AMZN", "NVDA", "META", "JPM", "NFLX", "AMD", "IBM"]

#Function to fetch stock data
def fetch_stock_data(ticker):
    """
    Fetches 10 years of daily historical stock data from Yahoo Finance.
    """
    try:
        stock = yf.Ticker(ticker)
        data = stock.history(period="10y", interval="1d")

        if data.empty:
            print(f" No data found for {ticker}. Skipping...")
            return None

        #Selecting important features
        data = data[['Open', 'High', 'Low', 'Close', 'Volume']]

        #Reset index to make 'Date' a column
        data.reset_index(inplace=True)

        #Add Ticker column to differentiate stocks
        data["Ticker"] = ticker

        return data

    except Exception as e:
        print(f"Error fetching {ticker}: {e}")
        return None

#Fetch multiple stock data in parallel (10x Faster!)
def fetch_multiple_stocks(tickers):
    """
    Fetches historical data for multiple stocks efficiently using multi-threading.
    """
    all_data = []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = executor.map(fetch_stock_data, tickers)

    for result in results:
        if result is not None:
            all_data.append(result)

    if all_data:
        df = pd.concat(all_data, ignore_index=True)

        #Data Cleaning (Remove Missing Values)
        df.dropna(inplace=True)

        #Feature Engineering: Adding Moving Averages (for better predictions)
        df['MA7'] = df.groupby('Ticker')['Close'].rolling(window=7).mean().reset_index(0, drop=True)
        df['MA21'] = df.groupby('Ticker')['Close'].rolling(window=21).mean().reset_index(0, drop=True)

        return df
    else:
        print("No data retrieved for any ticker.")
        return None

#Fetch and save data for multiple stocks
stock_data = fetch_multiple_stocks(tickers)

if stock_data is not None:
    print("Data successfully fetched!")
    print(stock_data.head())  # Display first few rows
    stock_data.to_csv("multi_stock_data_advanced.csv", index=False)  # Save optimized dataset
else:
    print("Data fetching failed.")


Data successfully fetched!
                       Date       Open       High        Low      Close  \
0 2015-04-06 00:00:00-04:00  27.797603  28.476520  27.766337  28.440786   
1 2015-04-07 00:00:00-04:00  28.505539  28.612735  28.134816  28.141516   
2 2015-04-08 00:00:00-04:00  28.105785  28.228616  27.909258  28.049953   
3 2015-04-09 00:00:00-04:00  28.105790  28.268820  27.840032  28.264353   
4 2015-04-10 00:00:00-04:00  28.128122  28.409515  27.974027  28.384949   

      Volume Ticker  MA7  MA21  
0  148776000   AAPL  NaN   NaN  
1  140049200   AAPL  NaN   NaN  
2  149316800   AAPL  NaN   NaN  
3  129936000   AAPL  NaN   NaN  
4  160752000   AAPL  NaN   NaN  


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load dataset
file_path = "multi_stock_data_advanced.csv"  # Step 1 ka saved dataset
df = pd.read_csv(file_path)

# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Sort values by Date (Important for time-series data)
df = df.sort_values(by=['Ticker', 'Date'])

# Feature Engineering: Creating New Indicators
df['Daily Return'] = df.groupby('Ticker')['Close'].pct_change()  # Percentage Change
df['Volatility'] = df.groupby('Ticker')['Daily Return'].rolling(window=7).std().reset_index(0, drop=True)  # 7-day rolling volatility

#Moving Averages for Trend Detection
df['MA50'] = df.groupby('Ticker')['Close'].rolling(window=50).mean().reset_index(0, drop=True)
df['MA200'] = df.groupby('Ticker')['Close'].rolling(window=200).mean().reset_index(0, drop=True)

#Normalization (Scaling for ML Models)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA7', 'MA21', 'MA50', 'MA200', 'Volatility', 'Daily Return']
df[scaled_features] = scaler.fit_transform(df[scaled_features])

#Save the preprocessed data
df.to_csv("preprocessed_stock_data.csv", index=False)

print("Step 2 Completed: Feature Engineering & Data Preprocessing Done!")
print(df.head())  # Display first few rows


  df['Date'] = pd.to_datetime(df['Date'])


Step 2 Completed: Feature Engineering & Data Preprocessing Done!
                        Date      Open      High       Low     Close  \
0  2015-04-06 00:00:00-04:00  0.025796  0.026319  0.026223  0.026437   
1  2015-04-07 00:00:00-04:00  0.026464  0.026447  0.026577  0.026154   
2  2015-04-08 00:00:00-04:00  0.026087  0.026086  0.026360  0.026067   
3  2015-04-09 00:00:00-04:00  0.026087  0.026124  0.026294  0.026270   
4  2015-04-10 00:00:00-04:00  0.026108  0.026256  0.026423  0.026384   

     Volume Ticker  MA7  MA21  Daily Return  Volatility  MA50  MA200  
0  0.039989   AAPL  NaN   NaN           NaN         NaN   NaN    NaN  
1  0.037625   AAPL  NaN   NaN      0.389722         NaN   NaN    NaN  
2  0.040136   AAPL  NaN   NaN      0.398039         NaN   NaN    NaN  
3  0.034886   AAPL  NaN   NaN      0.410506         NaN   NaN    NaN  
4  0.043233   AAPL  NaN   NaN      0.406643         NaN   NaN    NaN  


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

#Load the preprocessed dataset
file_path = "preprocessed_stock_data.csv"
df = pd.read_csv(file_path)

#Drop rows with NaN values (generated due to moving averages)
df.dropna(inplace=True)

#Define features (X) and target (y)
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA7', 'MA21', 'MA50', 'MA200', 'Volatility', 'Daily Return']
target = 'Close'  # Predicting future Close price

X = df[features].values
y = df[target].values

#Time-series based train-test split (80% Train, 20% Test)
split_ratio = 0.8
split_index = int(len(df) * split_ratio)

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

#Save the train-test data
np.save("X_train.npy", X_train)
np.save("X_test.npy", X_test)
np.save("y_train.npy", y_train)
np.save("y_test.npy", y_test)

print(f"Step 3 Completed: Train-Test Split Done!")
print(f"Training Data: {X_train.shape}, Testing Data: {X_test.shape}")


Step 3 Completed: Train-Test Split Done!
Training Data: (20398, 11), Testing Data: (5100, 11)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Conv1D, MaxPooling1D, Flatten
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
import lightgbm as lgb
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load train-test data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")

#LSTM Model
def build_lstm_model():
    model = Sequential([
        LSTM(50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1)),
        Dropout(0.2),
        LSTM(50, activation='relu'),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# CNN Model
def build_cnn_model():
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Reshape for LSTM & CNN
X_train_lstm = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_lstm = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Train LSTM Model
lstm_model = build_lstm_model()
lstm_model.fit(X_train_lstm, y_train, epochs=50, batch_size=16, verbose=1)

# Train CNN Model
cnn_model = build_cnn_model()
cnn_model.fit(X_train_lstm, y_train, epochs=50, batch_size=16, verbose=1)

# Train XGBoost Model
xgb = XGBRegressor(n_estimators=100, learning_rate=0.1)
xgb.fit(X_train, y_train)

# Train Random Forest Model
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, y_train)

# Train SVM Model
svm = SVR(kernel='rbf')
svm.fit(X_train, y_train)

# Train LightGBM Model
lgb_model = lgb.LGBMRegressor(n_estimators=100)
lgb_model.fit(X_train, y_train)

# Train ARIMA Model
arima_order = (5,1,0)  # Change based on best parameters
arima = ARIMA(y_train, order=arima_order)
arima_fit = arima.fit()

# Predictions
y_pred_lstm = lstm_model.predict(X_test_lstm)
y_pred_cnn = cnn_model.predict(X_test_lstm)
y_pred_xgb = xgb.predict(X_test)
y_pred_rf = rf.predict(X_test)
y_pred_svm = svm.predict(X_test)
y_pred_lgb = lgb_model.predict(X_test)
y_pred_arima = arima_fit.forecast(steps=len(y_test))

# Evaluate Models
def evaluate_model(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    print(f"{model_name} - MAE: {mae:.4f}, MSE: {mse:.4f}")

evaluate_model(y_test, y_pred_lstm, "LSTM")
evaluate_model(y_test, y_pred_cnn, "CNN")
evaluate_model(y_test, y_pred_xgb, "XGBoost")
evaluate_model(y_test, y_pred_rf, "Random Forest")
evaluate_model(y_test, y_pred_svm, "SVM")
evaluate_model(y_test, y_pred_lgb, "LightGBM")
evaluate_model(y_test, y_pred_arima, "ARIMA")

print("Step 4 Completed: AI Models Training & Evaluation Done!")


Epoch 1/50


  super().__init__(**kwargs)


[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0025
Epoch 2/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 11ms/step - loss: 3.1808e-04
Epoch 3/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 12ms/step - loss: 2.9374e-04
Epoch 4/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 11ms/step - loss: 2.6775e-04
Epoch 5/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 11ms/step - loss: 2.4998e-04
Epoch 6/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 11ms/step - loss: 2.2910e-04
Epoch 7/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 10ms/step - loss: 2.2072e-04
Epoch 8/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 11ms/step - loss: 2.3280e-04
Epoch 9/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 12ms/step - loss: 2.3351e-04
Epoch 10/50
[1m1275/1275[0m [32

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 2.2376e-04
Epoch 2/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 4.2946e-06
Epoch 3/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 4.3495e-06
Epoch 4/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3.6850e-06
Epoch 5/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 3.2211e-06
Epoch 6/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 2.8992e-06
Epoch 7/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3.4216e-06
Epoch 8/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2.6679e-06
Epoch 9/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3.3666e-06
Epoch 10/50
[1m1275/1275[0m [32m━━━━━━━━━━━━━



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004636 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2805
[LightGBM] [Info] Number of data points in the train set: 20398, number of used features: 11
[LightGBM] [Info] Start training from score 0.141026
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
LSTM - MAE: 0.0171, MSE: 0.0005
CNN - MAE: 0.0036, MSE: 0.0000
XGBoost - MAE: 0.0068, MSE: 0.0015
Random Forest - MAE: 0.0040, MSE: 0.0008
SVM - MAE: 0.0800, MSE: 0.0122
LightGBM - MAE: 0.0066, MSE: 0.0014
ARIMA - MAE: 0.2917, MSE: 0.0967
Step 4 Completed: AI Models Training & Evaluation Done!




In [None]:
import joblib
import tensorflow as tf
from sklearn.metrics import mean_absolute_error
import warnings

# Suppress FutureWarnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Load previous results
models = {
    "LSTM": lstm_model,
    "CNN": cnn_model,
    "XGBoost": xgb,
    "Random Forest": rf,
    "SVM": svm,
    "LightGBM": lgb_model,
    "ARIMA": arima_fit
}

# Reshape X_test for LSTM & CNN models
X_test_lstm = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))  # For LSTM
X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))   # For CNN

# Get predictions for all models
predictions = {
    "LSTM": lstm_model.predict(X_test_lstm),
    "CNN": cnn_model.predict(X_test_cnn),
    "XGBoost": xgb.predict(X_test),
    "Random Forest": rf.predict(X_test),
    "SVM": svm.predict(X_test),
    "LightGBM": lgb_model.predict(X_test),
    "ARIMA": arima_fit.forecast(steps=len(X_test))  # ARIMA needs `.forecast()`
}

# Select Best Model (Based on lowest MAE)
best_model_name = min(predictions.keys(), key=lambda x: mean_absolute_error(y_test, predictions[x]))
best_model = models[best_model_name]

print(f"Best Model Selected: {best_model_name}")

# Save models for backend deployment
joblib.dump(xgb, "xgb_model.pkl")
joblib.dump(rf, "rf_model.pkl")
joblib.dump(svm, "svm_model.pkl")
joblib.dump(lgb_model, "lgb_model.pkl")

#Use `.keras` format instead of `.h5`
lstm_model.save("lstm_model.keras")
cnn_model.save("cnn_model.keras")

# Save ARIMA model (different format)
joblib.dump(arima_fit, "arima_model.pkl")

print("Step 5 Completed: Model Files Saved for Backend Deployment!")



[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Best Model Selected: CNN
Step 5 Completed: Model Files Saved for Backend Deployment!


In [None]:
import streamlit as st
import numpy as np
import pandas as pd
import yfinance as yf
import pickle
import plotly.graph_objects as go
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler

#Models Load Karein
MODEL_PATH = "model/"
lstm_model = load_model(MODEL_PATH + "lstm_model.h5")
xgb_model = pickle.load(open(MODEL_PATH + "xgb_model.pkl", "rb"))
arima_model = pickle.load(open(MODEL_PATH + "arima_model.pkl", "rb"))

#Function: Stock Data Fetch Karna
def get_stock_data(company, period="5y"):
    data = yf.download(company, period=period)
    if data.empty:
        raise ValueError("No stock data found. Please check the ticker symbol.")
    return data

#Function: Data Preprocessing
def preprocess_data(data):
    if data.empty:
        raise ValueError("No data available for processing.")
    scaler = MinMaxScaler(feature_range=(0,1))
    data_scaled = scaler.fit_transform(data['Close'].values.reshape(-1,1))
    return data_scaled, scaler

#Function: Predict Stock Price
def predict_stock(company):
    data = get_stock_data(company)
    data_scaled, scaler = preprocess_data(data)

    #LSTM Prediction
    last_100 = data_scaled[-100:].reshape(1, -1, 1)
    lstm_pred = lstm_model.predict(last_100)[0][0]

    #XGBoost Prediction
    xgb_input = data_scaled[-xgb_model.n_features_in_:].flatten().reshape(1, -1)
    xgb_pred = xgb_model.predict(xgb_input)[0]

    #ARIMA Prediction
    arima_forecast = arima_model.forecast()
    arima_pred = arima_forecast[0] if not isinstance(arima_forecast, pd.Series) else arima_forecast.iloc[0]

    #Final Prediction Calculation
    predicted_price = np.mean([lstm_pred, xgb_pred, arima_pred])
    predicted_price = scaler.inverse_transform([[predicted_price]])[0][0]

    return round(predicted_price, 2), data

#Streamlit Web App (UI)
st.set_page_config(page_title="AI Stock Predictor", layout="wide")

st.title(" Advanced AI-Based Stock Price Prediction")
st.sidebar.header("Enter Stock Symbol")

#User Input for Stock Symbol
company = st.sidebar.text_input("Stock Symbol (e.g., AAPL, TSLA, MSFT)", "AAPL").upper()

if st.sidebar.button("Predict Stock Price"):
    st.subheader(f"Stock Price Prediction for {company}")
    try:
        predicted_price, stock_data = predict_stock(company)

        # Display Predicted Price
        st.metric(label="Predicted Price", value=f"${predicted_price}")

        # Plot Historical Stock Data (Candlestick Chart)
        st.subheader("Historical Stock Prices (Real-Time)")
        fig = go.Figure()
        fig.add_trace(go.Candlestick(
            x=stock_data.index,
            open=stock_data["Open"],
            high=stock_data["High"],
            low=stock_data["Low"],
            close=stock_data["Close"],
            name="Market Data"
        ))
        fig.update_layout(title=f"{company} Stock Price", xaxis_title="Date", yaxis_title="Price", template="plotly_dark")
        st.plotly_chart(fig)

    except ValueError as e:
        st.error(str(e))

st.sidebar.write("Developed using **Streamlit & AI Models**")