In [None]:
# app.py

import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

st.set_page_config(page_title="Customer Segmentation App", layout="wide")

# --- App Title ---
st.title("üß† Customer Segmentation using K-Means and PCA")
st.markdown("""
Upload a customer dataset to visualize and segment customers based on purchasing behavior.
The app will automatically:
- Preprocess your data  
- Apply PCA for visualization  
- Perform K-Means clustering  
- Display cluster insights  
""")

# --- File Upload ---
uploaded_file = st.file_uploader("üìÇ Upload your CSV file", type=["csv"])

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.subheader("üìã Preview of Uploaded Data")
    st.dataframe(df.head())

    # --- Column Selection ---
    st.markdown("### üß© Select the features to include in clustering")
    all_columns = df.columns.tolist()
    selected_columns = st.multiselect("Choose numeric columns for clustering:", all_columns)

    if selected_columns:
        data = df[selected_columns].copy()

        # Handle categorical columns if any
        data = pd.get_dummies(data, drop_first=True)

        # --- Scaling ---
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(data)

        # --- PCA for 2D Visualization ---
        pca = PCA(n_components=2)
        X_pca = pca.fit_transform(X_scaled)

        # --- Cluster Selection ---
        st.markdown("### ‚öôÔ∏è Choose Number of Clusters")
        k = st.slider("Number of clusters (K)", min_value=2, max_value=10, value=5, step=1)

        kmeans = KMeans(n_clusters=k, random_state=42)
        y_kmeans = kmeans.fit_predict(X_scaled)

        silhouette = silhouette_score(X_scaled, y_kmeans)
        st.write(f"**Silhouette Score:** {silhouette:.3f}")

        # Add cluster labels
        df['Cluster'] = y_kmeans

        # --- 2D Visualization ---
        st.markdown("### üé® Cluster Visualization (PCA Projection)")
        fig, ax = plt.subplots(figsize=(8,6))
        sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=y_kmeans, palette='tab10', s=80)
        plt.title("Customer Segments (PCA + K-Means)")
        plt.xlabel("Principal Component 1")
        plt.ylabel("Principal Component 2")
        plt.legend(title="Cluster")
        st.pyplot(fig)

        # --- Cluster Summary ---
        st.markdown("### üìä Cluster Summary")
        st.dataframe(df.groupby('Cluster')[selected_columns].mean())

        # --- Download Results ---
        csv = df.to_csv(index=False).encode('utf-8')
        st.download_button("üì• Download Clustered Data", csv, "clustered_customers.csv", "text/csv")

    else:
        st.warning("‚ö†Ô∏è Please select at least one numeric column for clustering.")

else:
    st.info("üëÜ Upload a CSV file to get started.")
