# Lab 4.4.7: Streamlit Dashboard - SOLUTION

**Module:** 4.4 - Containerization & Cloud Deployment  
**This is the complete solution notebook with all exercises solved.**

---

## Exercise 1 Solution: Complete Multi-Page Dashboard

In [None]:
# Complete production Streamlit dashboard

streamlit_dashboard = '''
"""Production Streamlit Dashboard for ML Models.

Features:
- Multi-page layout
- Chat playground
- Performance metrics
- Model comparison
- Real-time GPU monitoring

Run with: streamlit run dashboard.py
"""

import os
import time
import json
from datetime import datetime, timedelta
from typing import List, Dict, Any

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import requests

# ============================================
# Page Configuration
# ============================================

st.set_page_config(
    page_title="ML Model Dashboard",
    page_icon="",
    layout="wide",
    initial_sidebar_state="expanded",
)

# ============================================
# Session State Initialization
# ============================================

if "messages" not in st.session_state:
    st.session_state.messages = []

if "total_requests" not in st.session_state:
    st.session_state.total_requests = 0

if "request_history" not in st.session_state:
    st.session_state.request_history = []

if "model_metrics" not in st.session_state:
    st.session_state.model_metrics = {
        "latencies": [],
        "tokens": [],
        "timestamps": [],
    }

# ============================================
# Helper Functions
# ============================================

@st.cache_data(ttl=60)
def get_gpu_metrics() -> Dict[str, Any]:
    """Get GPU metrics (cached for 60 seconds)."""
    try:
        import pynvml
        pynvml.nvmlInit()
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
        
        mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
        util = pynvml.nvmlDeviceGetUtilizationRates(handle)
        temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
        
        pynvml.nvmlShutdown()
        
        return {
            "memory_used_gb": mem_info.used / (1024**3),
            "memory_total_gb": mem_info.total / (1024**3),
            "memory_percent": (mem_info.used / mem_info.total) * 100,
            "gpu_utilization": util.gpu,
            "temperature": temp,
        }
    except:
        # Mock data for demo
        return {
            "memory_used_gb": 45.2,
            "memory_total_gb": 128.0,
            "memory_percent": 35.3,
            "gpu_utilization": 42,
            "temperature": 52,
        }


def get_llm_response(prompt: str, model: str = "qwen3:8b") -> Dict[str, Any]:
    """Get response from LLM."""
    start_time = time.time()
    
    try:
        response = requests.post(
            "http://localhost:11434/api/chat",
            json={
                "model": model,
                "messages": [{"role": "user", "content": prompt}],
                "stream": False,
            },
            timeout=60,
        )
        response.raise_for_status()
        data = response.json()
        
        return {
            "content": data["message"]["content"],
            "latency_ms": (time.time() - start_time) * 1000,
            "tokens": data.get("eval_count", len(data["message"]["content"].split())),
        }
    except:
        # Mock response
        time.sleep(0.5)
        return {
            "content": f"[Mock Response] You asked: {prompt}",
            "latency_ms": (time.time() - start_time) * 1000,
            "tokens": 20,
        }


def update_metrics(latency_ms: float, tokens: int):
    """Update session metrics."""
    st.session_state.total_requests += 1
    st.session_state.model_metrics["latencies"].append(latency_ms)
    st.session_state.model_metrics["tokens"].append(tokens)
    st.session_state.model_metrics["timestamps"].append(datetime.now())
    
    # Keep only last 100 entries
    for key in st.session_state.model_metrics:
        st.session_state.model_metrics[key] = st.session_state.model_metrics[key][-100:]


# ============================================
# Sidebar Navigation
# ============================================

st.sidebar.title("Navigation")
page = st.sidebar.selectbox(
    "Select Page",
    ["Chat Playground", "Metrics Dashboard", "Model Comparison", "Settings"]
)

st.sidebar.markdown("---")

# GPU status in sidebar
st.sidebar.subheader("GPU Status")
gpu = get_gpu_metrics()
st.sidebar.metric("Memory", f"{gpu['memory_used_gb']:.1f} / {gpu['memory_total_gb']:.0f} GB")
st.sidebar.progress(gpu["memory_percent"] / 100)
st.sidebar.metric("GPU Util", f"{gpu['gpu_utilization']}%")
st.sidebar.metric("Temperature", f"{gpu['temperature']}C")

# ============================================
# Page: Chat Playground
# ============================================

if page == "Chat Playground":
    st.title("Chat Playground")
    
    # Model selection
    col1, col2 = st.columns([3, 1])
    with col1:
        model = st.selectbox(
            "Model",
            ["qwen3:8b", "qwen3:32b", "codellama:7b", "mistral:7b"],
        )
    with col2:
        if st.button("Clear Chat"):
            st.session_state.messages = []
            st.rerun()
    
    # Chat history
    for msg in st.session_state.messages:
        with st.chat_message(msg["role"]):
            st.write(msg["content"])
            if msg["role"] == "assistant" and "latency" in msg:
                st.caption(f"Latency: {msg['latency']:.0f}ms | Tokens: {msg['tokens']}")
    
    # Chat input
    if prompt := st.chat_input("Ask something..."):
        # Add user message
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.write(prompt)
        
        # Get response
        with st.chat_message("assistant"):
            with st.spinner("Thinking..."):
                result = get_llm_response(prompt, model)
            
            st.write(result["content"])
            st.caption(f"Latency: {result['latency_ms']:.0f}ms | Tokens: {result['tokens']}")
        
        # Store response and update metrics
        st.session_state.messages.append({
            "role": "assistant",
            "content": result["content"],
            "latency": result["latency_ms"],
            "tokens": result["tokens"],
        })
        update_metrics(result["latency_ms"], result["tokens"])

# ============================================
# Page: Metrics Dashboard
# ============================================

elif page == "Metrics Dashboard":
    st.title("Performance Metrics")
    
    # Top row: Key metrics
    col1, col2, col3, col4 = st.columns(4)
    
    latencies = st.session_state.model_metrics["latencies"]
    tokens = st.session_state.model_metrics["tokens"]
    
    with col1:
        st.metric("Total Requests", st.session_state.total_requests)
    with col2:
        avg_latency = sum(latencies) / len(latencies) if latencies else 0
        st.metric("Avg Latency", f"{avg_latency:.0f}ms")
    with col3:
        total_tokens = sum(tokens)
        st.metric("Total Tokens", f"{total_tokens:,}")
    with col4:
        tokens_per_sec = total_tokens / (sum(latencies) / 1000) if latencies else 0
        st.metric("Tokens/sec", f"{tokens_per_sec:.1f}")
    
    st.markdown("---")
    
    # Charts
    if latencies:
        col1, col2 = st.columns(2)
        
        with col1:
            st.subheader("Latency Over Time")
            df = pd.DataFrame({
                "Request": range(1, len(latencies) + 1),
                "Latency (ms)": latencies,
            })
            fig = px.line(df, x="Request", y="Latency (ms)")
            st.plotly_chart(fig, use_container_width=True)
        
        with col2:
            st.subheader("Tokens per Request")
            df = pd.DataFrame({
                "Request": range(1, len(tokens) + 1),
                "Tokens": tokens,
            })
            fig = px.bar(df, x="Request", y="Tokens")
            st.plotly_chart(fig, use_container_width=True)
        
        # Latency distribution
        st.subheader("Latency Distribution")
        fig = px.histogram(latencies, nbins=20, labels={"value": "Latency (ms)"})
        st.plotly_chart(fig, use_container_width=True)
    else:
        st.info("No metrics yet. Start chatting to generate data!")

# ============================================
# Page: Model Comparison
# ============================================

elif page == "Model Comparison":
    st.title("Model Comparison")
    
    col1, col2 = st.columns(2)
    with col1:
        model_a = st.selectbox("Model A", ["qwen3:8b", "mistral:7b"], key="model_a")
    with col2:
        model_b = st.selectbox("Model B", ["mistral:7b", "qwen3:8b"], key="model_b")
    
    prompt = st.text_area("Prompt", "Explain quantum computing in simple terms.")
    
    if st.button("Compare", type="primary"):
        col1, col2 = st.columns(2)
        
        with col1:
            st.subheader(f"{model_a}")
            with st.spinner("Generating..."):
                result_a = get_llm_response(prompt, model_a)
            st.write(result_a["content"])
            st.metric("Latency", f"{result_a['latency_ms']:.0f}ms")
            st.metric("Tokens", result_a["tokens"])
        
        with col2:
            st.subheader(f"{model_b}")
            with st.spinner("Generating..."):
                result_b = get_llm_response(prompt, model_b)
            st.write(result_b["content"])
            st.metric("Latency", f"{result_b['latency_ms']:.0f}ms")
            st.metric("Tokens", result_b["tokens"])

# ============================================
# Page: Settings
# ============================================

elif page == "Settings":
    st.title("Settings")
    
    st.subheader("Model Configuration")
    
    temperature = st.slider("Temperature", 0.0, 2.0, 0.7, 0.1)
    max_tokens = st.number_input("Max Tokens", 100, 4096, 512)
    top_p = st.slider("Top P", 0.0, 1.0, 0.9, 0.05)
    
    st.subheader("System Prompt")
    system_prompt = st.text_area(
        "System Prompt",
        "You are a helpful AI assistant.",
        height=100,
    )
    
    if st.button("Save Settings"):
        st.success("Settings saved!")
    
    st.markdown("---")
    st.subheader("About")
    st.markdown("""
    **ML Model Dashboard**
    
    Built with Streamlit for the DGX Spark AI Curriculum.
    
    Features:
    - Real-time chat interface
    - Performance metrics
    - Model comparison
    - GPU monitoring
    """)

# ============================================
# Footer
# ============================================

st.sidebar.markdown("---")
st.sidebar.caption(f"Last updated: {datetime.now().strftime('%H:%M:%S')}")
'''

print("COMPLETE STREAMLIT DASHBOARD:")
print("=" * 60)
print(streamlit_dashboard)

# Save to file
import os
os.makedirs("../app-examples", exist_ok=True)
with open("../app-examples/streamlit_dashboard.py", "w") as f:
    f.write(streamlit_dashboard)
print("\nSaved to: ../app-examples/streamlit_dashboard.py")

## Exercise 2 Solution: Deployment to Streamlit Cloud

In [None]:
# Streamlit Cloud deployment files

# requirements.txt
requirements = '''
streamlit>=1.30.0
pandas>=2.0.0
plotly>=5.18.0
requests>=2.31.0
nvidia-ml-py>=12.535.0; platform_system=="Linux"
'''

print("REQUIREMENTS.TXT:")
print(requirements)

# .streamlit/config.toml
config_toml = '''
[theme]
primaryColor = "#76b900"  # NVIDIA green
backgroundColor = "#0e1117"
secondaryBackgroundColor = "#262730"
textColor = "#fafafa"

[server]
maxUploadSize = 50
enableXsrfProtection = true
enableCORS = false

[browser]
gatherUsageStats = false
'''

print("\n.STREAMLIT/CONFIG.TOML:")
print(config_toml)

# Save files
import os
os.makedirs("../app-examples/.streamlit", exist_ok=True)

with open("../app-examples/requirements.txt", "w") as f:
    f.write(requirements)

with open("../app-examples/.streamlit/config.toml", "w") as f:
    f.write(config_toml)

print("\nSaved configuration files!")

---

## Summary

This solution demonstrated:

1. **Multi-Page Streamlit Dashboard**
   - Chat Playground with streaming
   - Metrics visualization with Plotly
   - Model comparison tool
   - Settings management

2. **Production Features**
   - GPU monitoring integration
   - Session state for persistence
   - Caching for performance
   - Real-time metrics updates

3. **Deployment Configuration**
   - Streamlit Cloud setup
   - Custom theming
   - Requirements management