<a href="https://colab.research.google.com/github/dwivedianiket/stockMarketAnalysis/blob/main/Real_Time_Stock_Market_Analysis_and_Prediction_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install dash
!pip install plotly



Data Collection

In [None]:
import requests
import pandas as pd
import time
from sklearn.model_selection import train_test_split, GridSearchCV, TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from dash import Dash, dcc, html
import plotly.graph_objs as go
from dash.dependencies import Input, Output

API_KEY = 'HEF3C8ZICXGWL1C4'  #Alpha Vantage API Key
symbol = 'AAPL'  # Stock symbol

def fetch_stock_data():
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={symbol}&interval=1min&apikey={API_KEY}'
    response = requests.get(url)
    data = response.json()
    df = pd.DataFrame(data['Time Series (1min)']).transpose()
    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df.index = pd.to_datetime(df.index)
    df = df.astype(float)
    return df

df = fetch_stock_data()
print(df.head())


                        Open     High      Low    Close  Volume
2024-08-27 19:59:00  227.575  227.650  227.250  227.450  3529.0
2024-08-27 19:58:00  227.471  227.575  227.471  227.575    72.0
2024-08-27 19:57:00  227.650  227.650  227.520  227.550    81.0
2024-08-27 19:56:00  227.525  227.650  227.400  227.520    76.0
2024-08-27 19:55:00  227.525  227.650  227.400  227.400   369.0


Data Preprocessing

In [None]:
def preprocess_data(df):
    # Calculate percentage change
    df['Pct_Change'] = df['Close'].pct_change()

    # Add lagged features
    for lag in [1, 5, 10]:
        df[f'Close_Lag_{lag}'] = df['Close'].shift(lag)
        df[f'Volume_Lag_{lag}'] = df['Volume'].shift(lag)

    # Calculate moving averages and volatility
    df['Price_Change'] = df['Close'].diff()
    df['SMA_10'] = df['Close'].rolling(window=10).mean()
    df['SMA_50'] = df['Close'].rolling(window=50).mean()
    df['Volatility'] = df['Close'].rolling(window=10).std()

    # Drop rows with NaN values
    df = df.dropna()

    # Normalize features
    scaler = StandardScaler()
    features = ['Open', 'High', 'Low', 'Volume', 'SMA_10', 'SMA_50', 'Volatility', 'Pct_Change']
    df[features] = scaler.fit_transform(df[features])

    return df


df = preprocess_data(df)
print(df.head())


                         Open      High       Low    Close    Volume  \
2024-08-27 19:10:00  1.055438  1.530451  2.147646  227.770 -0.566050   
2024-08-27 19:09:00  0.559790  0.958110  1.597175  227.750 -0.594756   
2024-08-27 19:08:00  2.170645  1.673536  1.597175  227.670  0.226232   
2024-08-27 19:07:00  1.922822  1.673536  1.363225  227.725  2.258608   
2024-08-27 19:06:00  0.931526  1.530451  1.321939  227.650 -0.181391   

                     Pct_Change  Close_Lag_1  Volume_Lag_1  Close_Lag_5  \
2024-08-27 19:10:00    0.257204      227.755         123.0      227.790   
2024-08-27 19:09:00   -0.236710      227.770          20.0      227.755   
2024-08-27 19:08:00   -1.083495      227.750           5.0      227.755   
2024-08-27 19:07:00    0.821988      227.670         434.0      227.710   
2024-08-27 19:06:00   -1.013048      227.725        1496.0      227.755   

                     Volume_Lag_5  Close_Lag_10  Volume_Lag_10  Price_Change  \
2024-08-27 19:10:00         106.0   



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Model Development

In [None]:
def train_model(df):
    X = df[['Open', 'High', 'Low', 'Volume', 'SMA_10', 'SMA_50', 'Volatility', 'Pct_Change']].values
    y = df['Close'].values

    # TimeSeriesSplit for cross-validation
    tscv = TimeSeriesSplit(n_splits=5)

    model = RandomForestRegressor(random_state=42)
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }

    grid_search = GridSearchCV(model, param_grid, cv=tscv, scoring='neg_mean_squared_error')
    grid_search.fit(X, y)

    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_

    print(f'Best Parameters: {best_params}')

    # Evaluate model
    predictions = best_model.predict(X)
    mse = mean_squared_error(y, predictions)
    print(f'Mean Squared Error on full dataset: {mse}')

    return best_model

model = train_model(df)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Best Parameters: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}
Mean Squared Error on full dataset: 0.0010288190741767865


Real-Time Prediction

In [None]:
def predict_real_time(df, model):
    X_real_time = df[['Open', 'High', 'Low', 'Volume', 'SMA_10', 'SMA_50', 'Volatility', 'Pct_Change']].values[-1:]
    prediction = model.predict(X_real_time)
    return prediction

df = fetch_stock_data()
df = preprocess_data(df)
model = train_model(df)

# Fetch new data and make a prediction
latest_data = fetch_stock_data()
processed_data = preprocess_data(latest_data)
prediction = predict_real_time(processed_data, model)
print(f'Real-Time Prediction: {prediction[0]}')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Best Parameters: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}
Mean Squared Error on full dataset: 0.0010288190741767865
Real-Time Prediction: 227.5928727376088




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Visualization

In [15]:
app = Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id='price-graph'),
    dcc.Graph(id='volume-graph'),
    dcc.Interval(id='interval-component', interval=1*60000, n_intervals=0)  # Update every 1 minute
])

@app.callback(
    Output('price-graph', 'figure'),
    [Input('interval-component', 'n_intervals')]
)
def update_price_graph(n):
    latest_data = fetch_stock_data()
    processed_data = preprocess_data(latest_data)
    prediction = predict_real_time(processed_data, model)

    trace1 = go.Scatter(
        x=processed_data.index,
        y=processed_data['Close'],
        mode='lines',
        name='Actual Price'
    )

    trace2 = go.Scatter(
        x=processed_data.index,
        y=processed_data['SMA_10'],
        mode='lines',
        name='SMA 10'
    )

    trace3 = go.Scatter(
        x=processed_data.index,
        y=processed_data['SMA_50'],
        mode='lines',
        name='SMA 50'
    )

    trace4 = go.Scatter(
        x=[processed_data.index[-1]],
        y=[prediction[0]],
        mode='markers',
        name='Predicted Price',
        marker=dict(size=10, color='red')
    )

    return {
        'data': [trace1, trace2, trace3, trace4],
        'layout': go.Layout(title=f'Real-Time Stock Price and SMA for {symbol}', xaxis_title='Time', yaxis_title='Price')
    }

@app.callback(
    Output('volume-graph', 'figure'),
    [Input('interval-component', 'n_intervals')]
)
def update_volume_graph(n):
    latest_data = fetch_stock_data()
    processed_data = preprocess_data(latest_data)

    trace1 = go.Bar(
        x=processed_data.index,
        y=processed_data['Volume'],
        name='Volume'
    )

    return {
        'data': [trace1],
        'layout': go.Layout(title=f'Real-Time Trading Volume for {symbol}', xaxis_title='Time', yaxis_title='Volume')
    }

if __name__ == '__main__':
    app.run_server(debug=True)

<IPython.core.display.Javascript object>