In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, accuracy_score
import joblib
import warnings
warnings.filterwarnings('ignore')

class StockPredictorTrainer:
    def __init__(self):
        self.regression_models = {}
        self.classification_models = {}
        self.scalers = {}
        self.feature_columns = []

    def create_features(self, df):
        """Create technical indicators and features"""
        # Price-based features
        df['returns'] = df['Close'].pct_change()
        df['volatility'] = df['returns'].rolling(window=10).std()

        # Moving averages
        df['ma_5'] = df['Close'].rolling(window=5).mean()
        df['ma_10'] = df['Close'].rolling(window=10).mean()
        df['ma_20'] = df['Close'].rolling(window=20).mean()

        # RSI
        delta = df['Close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
        rs = gain / loss
        df['rsi'] = 100 - (100 / (1 + rs))

        # MACD
        exp1 = df['Close'].ewm(span=12).mean()
        exp2 = df['Close'].ewm(span=26).mean()
        df['macd'] = exp1 - exp2
        df['macd_signal'] = df['macd'].ewm(span=9).mean()

        # Bollinger Bands
        df['bb_middle'] = df['Close'].rolling(window=20).mean()
        bb_std = df['Close'].rolling(window=20).std()
        df['bb_upper'] = df['bb_middle'] + (bb_std * 2)
        df['bb_lower'] = df['bb_middle'] - (bb_std * 2)
        df['bb_position'] = (df['Close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'])

        # Volume features
        df['volume_ma'] = df['Volume'].rolling(window=10).mean()
        df['volume_ratio'] = df['Volume'] / df['volume_ma']

        # Price position features
        df['high_low_ratio'] = df['High'] / df['Low']
        df['close_to_high'] = df['Close'] / df['High']
        df['close_to_low'] = df['Close'] / df['Low']

        return df

    def prepare_data(self, df, prediction_days=1):
        """Prepare data for training with features and targets"""
        # Create features
        df = self.create_features(df)

        # Select feature columns
        feature_cols = ['returns', 'volatility', 'ma_5', 'ma_10', 'ma_20', 'rsi',
                       'macd', 'macd_signal', 'bb_position', 'volume_ratio',
                       'high_low_ratio', 'close_to_high', 'close_to_low']

        # Add lagged features
        for i in range(1, 6):
            df[f'close_lag_{i}'] = df['Close'].shift(i)
            df[f'volume_lag_{i}'] = df['Volume'].shift(i)
            feature_cols.extend([f'close_lag_{i}', f'volume_lag_{i}'])

        self.feature_columns = feature_cols

        # Create targets
        df['future_price'] = df['Close'].shift(-prediction_days)
        df['price_direction'] = (df['future_price'] > df['Close']).astype(int)

        # Remove NaN values
        df = df.dropna()

        X = df[feature_cols]
        y_reg = df['future_price']
        y_clf = df['price_direction']

        return X, y_reg, y_clf, df

    def train_model(self, symbol, prediction_days=1):
        """Train both regression and classification models"""
        try:
            # Fetch data
            ticker = yf.Ticker(symbol)
            df = ticker.history(period="2y")

            if df.empty:
                return False, f"No data found for {symbol}"

            # Prepare data
            X, y_reg, y_clf, _ = self.prepare_data(df, prediction_days)

            if len(X) < 50:
                return False, f"Insufficient data for {symbol}"

            # Split data
            X_train, X_test, y_reg_train, y_reg_test, y_clf_train, y_clf_test = train_test_split(
                X, y_reg, y_clf, test_size=0.2, random_state=42, shuffle=False
            )

            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)

            # Train regression model (for price prediction)
            reg_model = RandomForestRegressor(n_estimators=100, random_state=42)
            reg_model.fit(X_train_scaled, y_reg_train)

            # Train classification model (for direction prediction)
            clf_model = RandomForestClassifier(n_estimators=100, random_state=42)
            clf_model.fit(X_train_scaled, y_clf_train)

            # Evaluate models
            reg_pred = reg_model.predict(X_test_scaled)
            clf_pred = clf_model.predict(X_test_scaled)

            mae = mean_absolute_error(y_reg_test, reg_pred)
            accuracy = accuracy_score(y_clf_test, clf_pred)

            # Store models and scaler
            self.regression_models[symbol] = reg_model
            self.classification_models[symbol] = clf_model
            self.scalers[symbol] = scaler

            return True, f"Model trained successfully. MAE: {mae:.2f}, Direction Accuracy: {accuracy:.2%}"

        except Exception as e:
            return False, f"Error training model for {symbol}: {str(e)}"

    def save_models(self, filename):
        """Save all models to file"""
        model_data = {
            'regression_models': self.regression_models,
            'classification_models': self.classification_models,
            'scalers': self.scalers,
            'feature_columns': self.feature_columns
        }
        joblib.dump(model_data, filename)
        return f"Models saved to {filename}"

    def get_training_summary(self):
        """Get summary of trained models"""
        if not self.regression_models:
            return "No models trained yet."

        summary = f"Trained models for {len(self.regression_models)} symbols:\n"
        for symbol in self.regression_models.keys():
            summary += f"- {symbol}\n"
        return summary

def main():
    st.set_page_config(page_title="Stock Model Training", layout="wide")

    st.title("🏋️ Stock Market Model Training")
    st.markdown("*Train and Save Prediction Models*")

    # Initialize trainer
    if 'trainer' not in st.session_state:
        st.session_state.trainer = StockPredictorTrainer()

    # Sidebar for model management
    st.sidebar.header("Model Management")

    # Save models
    if st.sidebar.button("💾 Save Trained Models"):
        if st.session_state.trainer.regression_models:
            filename = "stock_models.pkl"
            message = st.session_state.trainer.save_models(filename)
            st.sidebar.success(message)
        else:
            st.sidebar.warning("No models to save. Train some models first!")

    # Display current models
    st.sidebar.subheader("Current Session")
    summary = st.session_state.trainer.get_training_summary()
    st.sidebar.text(summary)

    # Main training interface
    st.header("Train Prediction Models")

    col1, col2 = st.columns(2)
    with col1:
        symbols_input = st.text_input(
            "Stock Symbols (comma-separated)",
            value="AAPL,GOOGL,MSFT,TSLA,AMZN",
            help="Enter stock symbols separated by commas"
        )

    with col2:
        prediction_days = st.selectbox(
            "Prediction Horizon (days)",
            options=[1, 2, 3, 5, 7, 10],
            index=0,
            help="How many days ahead to predict"
        )

    # Advanced options
    with st.expander("Advanced Training Options"):
        col1, col2 = st.columns(2)
        with col1:
            data_period = st.selectbox(
                "Training Data Period",
                options=["1y", "2y", "5y", "max"],
                index=1,
                help="How much historical data to use for training"
            )
        with col2:
            test_size = st.slider(
                "Test Set Size",
                min_value=0.1,
                max_value=0.4,
                value=0.2,
                step=0.05,
                help="Proportion of data for testing"
            )

    if st.button("🚀 Start Training", type="primary"):
        symbols = [s.strip().upper() for s in symbols_input.split(',')]

        if not symbols or symbols == ['']:
            st.error("Please enter at least one stock symbol")
        else:
            progress_bar = st.progress(0)
            status_text = st.empty()
            results_container = st.container()

            training_results = []

            for i, symbol in enumerate(symbols):
                status_text.text(f"Training model for {symbol}... ({i+1}/{len(symbols)})")

                success, message = st.session_state.trainer.train_model(symbol, prediction_days)

                training_results.append({
                    'Symbol': symbol,
                    'Status': '✅ Success' if success else '❌ Failed',
                    'Details': message
                })

                with results_container:
                    if success:
                        st.success(f"✅ {symbol}: {message}")
                    else:
                        st.error(f"❌ {symbol}: {message}")

                progress_bar.progress((i + 1) / len(symbols))

            status_text.text("Training completed!")

            # Summary table
            st.subheader("Training Summary")
            results_df = pd.DataFrame(training_results)
            st.dataframe(results_df, use_container_width=True)

            # Success rate
            success_count = sum(1 for r in training_results if r['Status'] == '✅ Success')
            success_rate = (success_count / len(training_results)) * 100

            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("Total Models", len(training_results))
            with col2:
                st.metric("Successful", success_count)
            with col3:
                st.metric("Success Rate", f"{success_rate:.1f}%")

            # Reminder to save
            if success_count > 0:
                st.info("💡 Don't forget to save your trained models using the sidebar button!")

    # Model Information Section
    st.markdown("---")
    st.subheader("📊 Model Information")

    col1, col2 = st.columns(2)

    with col1:
        st.markdown("""
        **Hybrid Approach:**
        - 🎯 **Regression Model**: Predicts actual stock prices
        - 📈 **Classification Model**: Predicts price direction (up/down)
        - 🔀 **Combined Strategy**: Uses direction confidence to adjust predictions
        """)

    with col2:
        st.markdown("""
        **Features Used:**
        - 📊 Technical indicators (RSI, MACD, Bollinger Bands)
        - 📉 Moving averages (5, 10, 20-day)
        - 🌊 Volatility measures
        - 📦 Volume analysis
        - 🔄 Lagged price/volume features
        """)

    # Footer
    st.markdown("---")
    st.markdown(
        """
        **Next Step**: After training, use the **Prediction Notebook** to load these models and make predictions.

        **Note**: Models are saved as `stock_models.pkl` in the current directory.
        """
    )

if __name__ == "__main__":
    main()