In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import yfinance as yf
import joblib
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

class StockPredictor:
    def __init__(self):
        self.regression_models = {}
        self.classification_models = {}
        self.scalers = {}
        self.feature_columns = []

    def create_features(self, df):
        """Create technical indicators and features - MUST match training notebook"""
        # Price-based features
        df['returns'] = df['Close'].pct_change()
        df['volatility'] = df['returns'].rolling(window=10).std()

        # Moving averages
        df['ma_5'] = df['Close'].rolling(window=5).mean()
        df['ma_10'] = df['Close'].rolling(window=10).mean()
        df['ma_20'] = df['Close'].rolling(window=20).mean()

        # RSI
        delta = df['Close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
        rs = gain / loss
        df['rsi'] = 100 - (100 / (1 + rs))

        # MACD
        exp1 = df['Close'].ewm(span=12).mean()
        exp2 = df['Close'].ewm(span=26).mean()
        df['macd'] = exp1 - exp2
        df['macd_signal'] = df['macd'].ewm(span=9).mean()

        # Bollinger Bands
        df['bb_middle'] = df['Close'].rolling(window=20).mean()
        bb_std = df['Close'].rolling(window=20).std()
        df['bb_upper'] = df['bb_middle'] + (bb_std * 2)
        df['bb_lower'] = df['bb_middle'] - (bb_std * 2)
        df['bb_position'] = (df['Close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'])

        # Volume features
        df['volume_ma'] = df['Volume'].rolling(window=10).mean()
        df['volume_ratio'] = df['Volume'] / df['volume_ma']

        # Price position features
        df['high_low_ratio'] = df['High'] / df['Low']
        df['close_to_high'] = df['Close'] / df['High']
        df['close_to_low'] = df['Close'] / df['Low']

        # Add lagged features
        for i in range(1, 6):
            df[f'close_lag_{i}'] = df['Close'].shift(i)
            df[f'volume_lag_{i}'] = df['Volume'].shift(i)

        return df

    def load_models(self, filename):
        """Load models from file"""
        try:
            model_data = joblib.load(filename)
            self.regression_models = model_data['regression_models']
            self.classification_models = model_data['classification_models']
            self.scalers = model_data['scalers']
            self.feature_columns = model_data['feature_columns']
            return True, "Models loaded successfully"
        except Exception as e:
            return False, f"Error loading models: {str(e)}"

    def predict(self, symbol, days=1):
        """Make predictions using hybrid approach"""
        try:
            if symbol not in self.regression_models:
                return None, "Model not trained for this symbol"

            # Fetch recent data
            ticker = yf.Ticker(symbol)
            df = ticker.history(period="3mo")

            # Prepare features
            df = self.create_features(df)

            # Get latest features
            latest_data = df[self.feature_columns].iloc[-1:].values

            # Check for NaN values
            if np.isnan(latest_data).any():
                return None, "Insufficient data for prediction (NaN values found)"

            # Scale features
            latest_scaled = self.scalers[symbol].transform(latest_data)

            predictions = []
            current_price = df['Close'].iloc[-1]

            for day in range(1, days + 1):
                # Regression prediction (price)
                price_pred = self.regression_models[symbol].predict(latest_scaled)[0]

                # Classification prediction (direction)
                direction_prob = self.classification_models[symbol].predict_proba(latest_scaled)[0]
                direction = self.classification_models[symbol].predict(latest_scaled)[0]

                # Hybrid approach: adjust price prediction based on direction confidence
                confidence = max(direction_prob)
                if direction == 0 and confidence > 0.6:  # Strong downward signal
                    price_pred = min(price_pred, current_price * 0.98)
                elif direction == 1 and confidence > 0.6:  # Strong upward signal
                    price_pred = max(price_pred, current_price * 1.02)

                predictions.append({
                    'day': day,
                    'predicted_price': price_pred,
                    'direction': 'Up' if direction == 1 else 'Down',
                    'confidence': confidence,
                    'change_pct': ((price_pred - current_price) / current_price) * 100
                })

                current_price = price_pred  # Use prediction for next iteration

            return predictions, None

        except Exception as e:
            return None, f"Error making prediction: {str(e)}"

    def get_model_info(self):
        """Get information about loaded models"""
        if not self.regression_models:
            return "No models loaded."

        info = f"Loaded models for {len(self.regression_models)} symbols:\n"
        for symbol in self.regression_models.keys():
            info += f"- {symbol}\n"
        return info

def plot_predictions(symbol, predictions, historical_data):
    """Create interactive plot for predictions"""
    fig = make_subplots(
        rows=2, cols=1,
        subplot_titles=(f'{symbol} Price Prediction', 'Prediction Confidence'),
        vertical_spacing=0.1
    )

    # Historical prices
    fig.add_trace(
        go.Scatter(
            x=historical_data.index[-30:],
            y=historical_data['Close'].iloc[-30:],
            mode='lines',
            name='Historical Price',
            line=dict(color='blue')
        ),
        row=1, col=1
    )

    # Predictions
    pred_dates = pd.date_range(
        start=historical_data.index[-1] + pd.Timedelta(days=1),
        periods=len(predictions),
        freq='D'
    )

    pred_prices = [p['predicted_price'] for p in predictions]

    fig.add_trace(
        go.Scatter(
            x=pred_dates,
            y=pred_prices,
            mode='lines+markers',
            name='Predicted Price',
            line=dict(color='red', dash='dash'),
            marker=dict(size=8)
        ),
        row=1, col=1
    )

    # Confidence levels
    confidences = [p['confidence'] for p in predictions]
    colors = ['green' if p['direction'] == 'Up' else 'red' for p in predictions]

    fig.add_trace(
        go.Bar(
            x=[f"Day {p['day']}" for p in predictions],
            y=confidences,
            name='Prediction Confidence',
            marker_color=colors
        ),
        row=2, col=1
    )

    fig.update_layout(height=600, showlegend=True)
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_yaxes(title_text="Price ($)", row=1, col=1)
    fig.update_xaxes(title_text="Prediction Day", row=2, col=1)
    fig.update_yaxes(title_text="Confidence", row=2, col=1)

    return fig

def main():
    st.set_page_config(page_title="Stock Market Predictor", layout="wide")

    st.title("🔮 Stock Market Predictor")
    st.markdown("*Load Models and Make Predictions*")

    # Initialize predictor
    if 'predictor' not in st.session_state:
        st.session_state.predictor = StockPredictor()
        st.session_state.models_loaded = False

    # Sidebar for model management
    st.sidebar.header("Model Management")

    # Load models
    if st.sidebar.button("📂 Load Trained Models"):
        filename = "stock_models.pkl"
        success, message = st.session_state.predictor.load_models(filename)
        if success:
            st.sidebar.success(message)
            st.session_state.models_loaded = True
            st.rerun()
        else:
            st.sidebar.error(message)
            st.session_state.models_loaded = False

    # Display loaded models
    st.sidebar.subheader("Loaded Models")
    model_info = st.session_state.predictor.get_model_info()
    st.sidebar.text(model_info)

    # Main interface
    if not st.session_state.models_loaded or not st.session_state.predictor.regression_models:
        st.warning("⚠️ No models loaded. Please load trained models first using the sidebar.")
        st.info("💡 Make sure you have trained models using the **Training Notebook** and saved them as 'stock_models.pkl'")
        return

    # Create tabs
    tab1, tab2 = st.tabs(["Individual Predictions", "Batch Analysis"])

    with tab1:
        st.header("Make Individual Predictions")

        col1, col2 = st.columns(2)
        with col1:
            prediction_symbol = st.selectbox(
                "Select Stock Symbol",
                options=list(st.session_state.predictor.regression_models.keys()),
                help="Choose from loaded models"
            )

        with col2:
            prediction_horizon = st.slider(
                "Prediction Days",
                min_value=1,
                max_value=10,
                value=5,
                help="Number of days to predict ahead"
            )

        col1, col2 = st.columns([1, 4])
        with col1:
            predict_button = st.button("🎯 Generate Predictions", type="primary")

        if predict_button:
            with st.spinner("Generating predictions..."):
                predictions, error = st.session_state.predictor.predict(prediction_symbol, prediction_horizon)

                if error:
                    st.error(f"❌ {error}")
                else:
                    # Display predictions table
                    pred_df = pd.DataFrame(predictions)
                    pred_df['predicted_price'] = pred_df['predicted_price'].round(2)
                    pred_df['confidence'] = pred_df['confidence'].round(3)
                    pred_df['change_pct'] = pred_df['change_pct'].round(2)

                    st.subheader(f"📊 Predictions for {prediction_symbol}")

                    # Style the dataframe
                    styled_df = pred_df.style.format({
                        'predicted_price': '${:.2f}',
                        'confidence': '{:.1%}',
                        'change_pct': '{:+.2f}%'
                    }).applymap(
                        lambda x: 'color: green' if x == 'Up' else 'color: red' if x == 'Down' else '',
                        subset=['direction']
                    )
                    st.dataframe(styled_df, use_container_width=True)

                    # Fetch historical data for plotting
                    ticker = yf.Ticker(prediction_symbol)
                    hist_data = ticker.history(period="1mo")

                    # Create and display plot
                    fig = plot_predictions(prediction_symbol, predictions, hist_data)
                    st.plotly_chart(fig, use_container_width=True)

                    # Summary metrics
                    col1, col2, col3, col4 = st.columns(4)

                    current_price = hist_data['Close'].iloc[-1]
                    final_price = predictions[-1]['predicted_price']
                    total_change = ((final_price - current_price) / current_price) * 100
                    avg_confidence = np.mean([p['confidence'] for p in predictions])

                    with col1:
                        st.metric("Current Price", f"${current_price:.2f}")
                    with col2:
                        st.metric("Final Predicted Price", f"${final_price:.2f}")
                    with col3:
                        st.metric("Total Expected Change", f"{total_change:+.2f}%")
                    with col4:
                        st.metric("Average Confidence", f"{avg_confidence:.1%}")

    with tab2:
        st.header("Batch Analysis")

        prediction_days_batch = st.selectbox(
            "Prediction Horizon for Batch",
            options=[1, 3, 5, 7, 10],
            index=2
        )

        col1, col2 = st.columns([1, 4])
        with col1:
            batch_button = st.button("🚀 Run Batch Analysis", type="primary")

        if batch_button:
            batch_results = []

            progress_bar = st.progress(0)
            status_text = st.empty()
            symbols = list(st.session_state.predictor.regression_models.keys())

            for i, symbol in enumerate(symbols):
                status_text.text(f"Analyzing {symbol}... ({i+1}/{len(symbols)})")

                predictions, error = st.session_state.predictor.predict(symbol, prediction_days_batch)

                if not error and predictions:
                    # Get current price
                    ticker = yf.Ticker(symbol)
                    current_data = ticker.history(period="1d")
                    current_price = current_data['Close'].iloc[-1]

                    final_pred = predictions[-1]
                    batch_results.append({
                        'Symbol': symbol,
                        'Current Price': current_price,
                        'Predicted Price': final_pred['predicted_price'],
                        'Expected Change %': final_pred['change_pct'],
                        'Direction': final_pred['direction'],
                        'Confidence': final_pred['confidence']
                    })

                progress_bar.progress((i + 1) / len(symbols))

            status_text.text("Analysis completed!")

            if batch_results:
                batch_df = pd.DataFrame(batch_results)

                # Style the batch results
                styled_batch = batch_df.style.format({
                    'Current Price': '${:.2f}',
                    'Predicted Price': '${:.2f}',
                    'Expected Change %': '{:+.2f}%',
                    'Confidence': '{:.1%}'
                }).applymap(
                    lambda x: 'color: green' if x == 'Up' else 'color: red' if x == 'Down' else '',
                    subset=['Direction']
                ).applymap(
                    lambda x: 'color: green; font-weight: bold' if x > 0 else 'color: red; font-weight: bold' if x < 0 else '',
                    subset=['Expected Change %']
                )

                st.subheader(f"📈 Batch Analysis Results ({prediction_days_batch} days ahead)")
                st.dataframe(styled_batch, use_container_width=True)

                # Summary statistics
                col1, col2, col3, col4 = st.columns(4)

                with col1:
                    avg_change = batch_df['Expected Change %'].mean()
                    st.metric("Average Expected Change", f"{avg_change:+.2f}%")

                with col2:
                    positive_predictions = (batch_df['Expected Change %'] > 0).sum()
                    st.metric("Positive Predictions", f"{positive_predictions}/{len(batch_df)}")

                with col3:
                    avg_confidence = batch_df['Confidence'].mean()
                    st.metric("Average Confidence", f"{avg_confidence:.1%}")

                with col4:
                    high_confidence = (batch_df['Confidence'] > 0.6).sum()
                    st.metric("High Confidence (>60%)", f"{high_confidence}/{len(batch_df)}")

                # Top movers
                col1, col2 = st.columns(2)

                with col1:
                    st.subheader("🚀 Top Potential Gainers")
                    top_gainers = batch_df.nlargest(3, 'Expected Change %')
                    for _, row in top_gainers.iterrows():
                        st.write(f"**{row['Symbol']}**: +{row['Expected Change %']:.1f}% (Confidence: {row['Confidence']:.1%})")

                with col2:
                    st.subheader("📉 Top Potential Decliners")
                    top_decliners = batch_df.nsmallest(3, 'Expected Change %')
                    for _, row in top_decliners.iterrows():
                        st.write(f"**{row['Symbol']}**: {row['Expected Change %']:.1f}% (Confidence: {row['Confidence']:.1%})")

                # Create a summary chart
                st.subheader("📊 Prediction Summary Chart")

                fig = go.Figure()

                # Color coding based on direction and confidence
                colors = []
                for _, row in batch_df.iterrows():
                    if row['Direction'] == 'Up' and row['Confidence'] > 0.6:
                        colors.append('darkgreen')
                    elif row['Direction'] == 'Up':
                        colors.append('lightgreen')
                    elif row['Direction'] == 'Down' and row['Confidence'] > 0.6:
                        colors.append('darkred')
                    else:
                        colors.append('lightcoral')

                fig.add_trace(go.Bar(
                    x=batch_df['Symbol'],
                    y=batch_df['Expected Change %'],
                    marker_color=colors,
                    text=[f"{conf:.1%}" for conf in batch_df['Confidence']],
                    textposition='outside',
                    hovertemplate='<b>%{x}</b><br>' +
                                  'Expected Change: %{y:+.2f}%<br>' +
                                  'Confidence: %{text}<br>' +
                                  '<extra></extra>'
                ))

                fig.update_layout(
                    title=f"Expected Price Changes ({prediction_days_batch} days ahead)",
                    xaxis_title="Stock Symbol",
                    yaxis_title="Expected Change (%)",
                    showlegend=False,
                    height=400
                )

                fig.add_hline(y=0, line_dash="dash", line_color="gray")
                st.plotly_chart(fig, use_container_width=True)

            else:
                st.warning("No successful predictions in batch analysis.")

    # Risk Warning
    st.markdown("---")
    st.warning(
        """
        ⚠️ **Investment Disclaimer**: This tool is for educational and research purposes only.
        Stock market predictions are inherently uncertain and should not be used as the sole basis for investment decisions.
        Always consult with financial professionals and conduct your own research before making investment decisions.
        Past performance does not guarantee future results.
        """
    )

    # Model Information
    with st.expander("ℹ️ Model Information"):
        st.markdown("""
        **Hybrid Prediction Approach:**
        - **Regression Component**: Predicts actual stock prices using Random Forest
        - **Classification Component**: Predicts price direction (up/down) with confidence scores
        - **Feature Engineering**: Uses technical indicators, moving averages, volatility measures, and volume analysis
        - **Confidence Adjustment**: High-confidence directional predictions adjust the final price prediction

        **Key Features:**
        - RSI, MACD, Bollinger Bands
        - Moving averages (5, 10, 20-day)
        - Volatility and volume ratios
        - Lagged price and volume features

        **Interpretation:**
        - **Confidence > 60%**: High confidence prediction
        - **Green bars**: Upward predictions
        - **Red bars**: Downward predictions
        - **Darker colors**: Higher confidence levels
        """)

if __name__ == "__main__":
    main()

2025-05-28 11:15:18,692 - __main__ - INFO - Starting stock prediction system
2025-05-28 11:15:18,695 - __main__ - INFO - Downloading data for AAPL


📝 Logging to: stock_predictions\prediction.log

📊 Processing AAPL...


2025-05-28 11:15:19,234 - __main__ - INFO - Successfully downloaded 1255 records for AAPL
2025-05-28 11:15:19,245 - __main__ - ERROR - Model training failed: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:19,247 - __main__ - ERROR - Failed to process AAPL: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:19,249 - __main__ - INFO - Downloading data for MSFT



📊 Processing MSFT...


2025-05-28 11:15:19,918 - __main__ - INFO - Successfully downloaded 1255 records for MSFT
2025-05-28 11:15:19,928 - __main__ - ERROR - Model training failed: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:19,930 - __main__ - ERROR - Failed to process MSFT: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:19,931 - __main__ - INFO - Downloading data for GOOGL



📊 Processing GOOGL...


2025-05-28 11:15:20,292 - __main__ - INFO - Successfully downloaded 1255 records for GOOGL
2025-05-28 11:15:20,304 - __main__ - ERROR - Model training failed: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:20,305 - __main__ - ERROR - Failed to process GOOGL: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:20,307 - __main__ - INFO - Downloading data for AMZN



📊 Processing AMZN...


2025-05-28 11:15:20,684 - __main__ - INFO - Successfully downloaded 1255 records for AMZN
2025-05-28 11:15:20,697 - __main__ - ERROR - Model training failed: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:20,699 - __main__ - ERROR - Failed to process AMZN: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:20,702 - __main__ - INFO - Downloading data for TSLA



📊 Processing TSLA...


2025-05-28 11:15:21,095 - __main__ - INFO - Successfully downloaded 1255 records for TSLA
2025-05-28 11:15:21,107 - __main__ - ERROR - Model training failed: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:21,109 - __main__ - ERROR - Failed to process TSLA: Found input variables with inconsistent numbers of samples: [1245, 249]
2025-05-28 11:15:21,111 - __main__ - INFO - Completed predictions for 0/5 stocks



✅ Prediction pipeline completed successfully!
📁 Results saved to: stock_predictions/


Something something etc 