In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import os

# Set page configuration
st.set_page_config(
    page_title="ML Model Prediction App",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Load the saved model
@st.cache_resource
def load_model():
    model = joblib.load('best_model_pipeline.joblib')
    return model

model = load_model()

# Title and description
st.title("Machine Learning Prediction App")
st.write("Enter the features below and get predictions from the trained model.")

# Get feature names - you'll need to adjust this based on your model's features
# This is just a placeholder - replace with your actual features
def get_feature_names():
    # Replace this with your actual feature names that your model expects
    # These should be the raw features before preprocessing
    return ["feature1", "feature2", "feature3", "feature4"]  # Example features

features = get_feature_names()

# Create form for user input
with st.form("prediction_form"):
    st.subheader("Input Features")
    
    # Create columns for a cleaner layout
    cols = st.columns(2)
    
    # Create input fields for each feature
    input_data = {}
    for i, feature in enumerate(features):
        col_idx = i % 2
        with cols[col_idx]:
            input_data[feature] = st.number_input(f"{feature}", value=0.0)
    
    # Submit button
    submit_button = st.form_submit_button("Predict")

# Make prediction when form is submitted
if submit_button:
    # Convert input to DataFrame
    input_df = pd.DataFrame([input_data])
    
    # Make prediction
    prediction = model.predict(input_df)
    
    # Show prediction results
    st.subheader("Prediction Results")
    st.write(f"Predicted Class: {prediction[0]}")
    
    # If model supports probability prediction
    try:
        probabilities = model.predict_proba(input_df)
        st.write("Class Probabilities:")
        prob_df = pd.DataFrame(
            probabilities, 
            columns=[f"Class {i}" for i in range(len(probabilities[0]))]
        )
        st.dataframe(prob_df)
    except:
        st.write("Probability scores not available for this model.")

# Add additional information
st.sidebar.header("About")
st.sidebar.info(
    """
    This app uses a machine learning model to make predictions based on the input features.
    
    The model is a Random Forest Classifier with balanced subsample class weights.
    """
)

# Optional: Add feature to upload test data CSV for batch prediction
st.sidebar.header("Batch Prediction")
uploaded_file = st.sidebar.file_uploader("Upload CSV for batch prediction", type=["csv"])

if uploaded_file is not None:
    # Read the CSV file
    test_data = pd.read_csv(uploaded_file)
    
    # Show the data
    st.subheader("Uploaded Data")
    st.dataframe(test_data)
    
    # Make batch predictions on button click
    if st.sidebar.button("Run Batch Prediction"):
        # Check if columns match
        missing_cols = set(features) - set(test_data.columns)
        if missing_cols:
            st.error(f"Missing columns in uploaded data: {missing_cols}")
        else:
            # Make prediction
            batch_predictions = model.predict(test_data)
            
            # Create results dataframe
            results = test_data.copy()
            results["Prediction"] = batch_predictions
            
            # Display results
            st.subheader("Prediction Results")
            st.dataframe(results)
            
            # Option to download results
            csv = results.to_csv(index=False)
            st.download_button(
                label="Download Results as CSV",
                data=csv,
                file_name="prediction_results.csv",
                mime="text/csv"
            )