<a href="https://colab.research.google.com/github/dwivedianiket/stockMarketAnalysis/blob/main/Real_Time_Stock_Market_Analysis_and_Prediction_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --upgrade pip
!pip install dash
!pip install plotly
!pip install pyngrok
!pip install --upgrade pip setuptools




Data Collection

In [4]:
import requests
import pandas as pd
import time
from sklearn.model_selection import train_test_split, GridSearchCV, TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from dash import Dash, dcc, html
import plotly.graph_objs as go
from dash.dependencies import Input, Output
from pyngrok import ngrok

API_KEY = 'ISFQL7OJCVVEX6GC'  #Alpha Vantage API Key
symbol = 'AAPL'  # Stock symbol

def fetch_stock_data():
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={symbol}&interval=1min&apikey={API_KEY}'
    response = requests.get(url)
    data = response.json()
    df = pd.DataFrame(data['Time Series (1min)']).transpose()
    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df.index = pd.to_datetime(df.index)
    df = df.astype(float)
    return df

df = fetch_stock_data()
print(df.head())

                        Open    High      Low     Close  Volume
2024-08-30 19:59:00  229.200  229.20  229.100  229.1500   237.0
2024-08-30 19:58:00  229.190  229.22  229.130  229.1991   618.0
2024-08-30 19:57:00  229.170  229.20  229.040  229.2000   458.0
2024-08-30 19:56:00  229.140  229.19  229.120  229.1600   792.0
2024-08-30 19:55:00  229.095  229.14  229.095  229.1300   152.0


Data Preprocessing

In [5]:
def preprocess_data(df):
    # Calculate percentage change
    df['Pct_Change'] = df['Close'].pct_change()

    # Add lagged features
    for lag in [1, 5, 10]:
        df[f'Close_Lag_{lag}'] = df['Close'].shift(lag)
        df[f'Volume_Lag_{lag}'] = df['Volume'].shift(lag)

    # Calculate moving averages and volatility
    df['Price_Change'] = df['Close'].diff()
    df['SMA_10'] = df['Close'].rolling(window=10).mean()
    df['SMA_50'] = df['Close'].rolling(window=50).mean()
    df['Volatility'] = df['Close'].rolling(window=10).std()

    # Drop rows with NaN values
    df = df.dropna()

    # Normalize features
    scaler = StandardScaler()
    features = ['Open', 'High', 'Low', 'Volume', 'SMA_10', 'SMA_50', 'Volatility', 'Pct_Change']
    df.loc[:, features] = scaler.fit_transform(df[features])

    return df

df = preprocess_data(df)
print(df.head())

                         Open      High       Low    Close    Volume  \
2024-08-30 19:10:00  1.737768  1.571268  2.222214  229.180  0.207642   
2024-08-30 19:09:00  1.643864  1.571268  2.126317  229.170 -0.382075   
2024-08-30 19:08:00  1.643864  1.571268  2.126317  229.180 -0.407246   
2024-08-30 19:07:00  1.456057  1.571268  1.742729  229.175  0.497107   
2024-08-30 19:06:00  1.456057  1.358484  1.742729  229.150 -0.299371   

                     Pct_Change  Close_Lag_1  Volume_Lag_1  Close_Lag_5  \
2024-08-30 19:10:00    1.383111      229.135         463.0      229.140   
2024-08-30 19:09:00   -0.247002      229.180         352.0      229.190   
2024-08-30 19:08:00    0.345684      229.170          24.0      229.140   
2024-08-30 19:07:00   -0.098834      229.180          10.0      229.140   
2024-08-30 19:06:00   -0.691523      229.175         513.0      229.135   

                     Volume_Lag_5  Close_Lag_10  Volume_Lag_10  Price_Change  \
2024-08-30 19:10:00          13.0   

Model Development

In [6]:
def train_model(df):
    X = df[['Open', 'High', 'Low', 'Volume', 'SMA_10', 'SMA_50', 'Volatility', 'Pct_Change']].values
    y = df['Close'].values

    # TimeSeriesSplit for cross-validation
    tscv = TimeSeriesSplit(n_splits=5)

    model = RandomForestRegressor(random_state=42)
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }

    grid_search = GridSearchCV(model, param_grid, cv=tscv, scoring='neg_mean_squared_error')
    grid_search.fit(X, y)

    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_

    print(f'Best Parameters: {best_params}')

    # Evaluate model
    predictions = best_model.predict(X)
    mse = mean_squared_error(y, predictions)
    print(f'Mean Squared Error on full dataset: {mse}')

    return best_model

model = train_model(df)

Best Parameters: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}
Mean Squared Error on full dataset: 7.662207047131854e-05


Real-Time Prediction

In [7]:
def predict_real_time(df, model):
    X_real_time = df[['Open', 'High', 'Low', 'Volume', 'SMA_10', 'SMA_50', 'Volatility', 'Pct_Change']].values[-1:]
    prediction = model.predict(X_real_time)
    return prediction

df = fetch_stock_data()
df = preprocess_data(df)
model = train_model(df)

# Fetch new data and make a prediction
latest_data = fetch_stock_data()
processed_data = preprocess_data(latest_data)
prediction = predict_real_time(processed_data, model)
print(f'Real-Time Prediction: {prediction[0]}')

Best Parameters: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}
Mean Squared Error on full dataset: 7.662207047131854e-05
Real-Time Prediction: 229.0502785515872


Visualization

In [8]:
app = Dash(__name__)
server = app.server
app.layout = html.Div([
    dcc.Graph(id='price-graph'),
    dcc.Graph(id='volume-graph'),
    dcc.Graph(id='sma-10-graph'),
    dcc.Graph(id='sma-50-graph'),
    dcc.Interval(id='interval-component', interval=1*60000, n_intervals=0)  # Update every 1 minute
])
ngrok.set_auth_token('2lWRfVpAMrVStCm2gsCfJfxPchP_6DF2rUafLYkkt9EKiKtNW')

@app.callback(
    Output('price-graph', 'figure'),
    [Input('interval-component', 'n_intervals')]
)
def update_price_graph(n):
    latest_data = fetch_stock_data()
    processed_data = preprocess_data(latest_data)
    prediction = predict_real_time(processed_data, model)

    trace1 = go.Scatter(
        x=processed_data.index,
        y=processed_data['Close'],
        mode='lines',
        name='Actual Price'
    )

    trace2 = go.Scatter(
        x=processed_data.index,
        y=processed_data['SMA_10'],
        mode='lines',
        name='SMA 10'
    )

    trace3 = go.Scatter(
        x=processed_data.index,
        y=processed_data['SMA_50'],
        mode='lines',
        name='SMA 50'
    )

    trace4 = go.Scatter(
        x=[processed_data.index[-1]],
        y=[prediction[0]],
        mode='markers',
        name='Predicted Price',
        marker=dict(size=10, color='red')
    )

    return {
        'data': [trace1, trace2, trace3, trace4],
        'layout': go.Layout(title=f'Real-Time Stock Price and SMA for {symbol}', xaxis_title='Time', yaxis_title='Price')
    }

@app.callback(
    Output('volume-graph', 'figure'),
    [Input('interval-component', 'n_intervals')]
)
def update_volume_graph(n):
    latest_data = fetch_stock_data()
    processed_data = preprocess_data(latest_data)

    trace1 = go.Bar(
        x=processed_data.index,
        y=processed_data['Volume'],
        name='Volume'
    )

    return {
        'data': [trace1],
        'layout': go.Layout(title=f'Real-Time Trading Volume for {symbol}', xaxis_title='Time', yaxis_title='Volume')
    }
@app.callback(
    Output('sma-10-graph', 'figure'),
    [Input('interval-component', 'n_intervals')]
)
def update_sma_10_graph(n):
    latest_data = fetch_stock_data()
    processed_data = preprocess_data(latest_data)

    trace1 = go.Scatter(
        x=processed_data.index,
        y=processed_data['SMA_10'],
        mode='lines',
        name='SMA 10'
    )

    return {
        'data': [trace1],
        'layout': go.Layout(title=f'SMA 10 for {symbol}', xaxis_title='Time', yaxis_title='Price')
    }

@app.callback(
    Output('sma-50-graph', 'figure'),
    [Input('interval-component', 'n_intervals')]
)
def update_sma_50_graph(n):
    latest_data = fetch_stock_data()
    processed_data = preprocess_data(latest_data)

    trace1 = go.Scatter(
        x=processed_data.index,
        y=processed_data['SMA_50'],
        mode='lines',
        name='SMA 50'
    )

    return {
        'data': [trace1],
        'layout': go.Layout(title=f'SMA 50 for {symbol}', xaxis_title='Time', yaxis_title='Price')
    }


if __name__ == '__main__':
    app.run_server(debug=True)

<IPython.core.display.Javascript object>