# BigQuery Anti-Pattern Recognition - Streamlit Frontend

Simple web interface for BigQuery anti-pattern detection and AI-powered query optimization.

In [None]:
# Configuration
PROJECT_ID = "your-project-id"  # Update this
SERVICE_URL = "your-service-url"  # Update this

print(f"Project ID: {PROJECT_ID}")
print(f"Service URL: {SERVICE_URL}")

In [None]:
# Create Streamlit app
streamlit_app_code = f'''
import streamlit as st
import requests
import json
from google.auth import default
from google.auth.transport.requests import Request

SERVICE_URL = "{SERVICE_URL}"
PROJECT_ID = "{PROJECT_ID}"

st.set_page_config(
    page_title="BigQuery Anti-Pattern Recognition",
    page_icon="🔍",
    layout="wide"
)

st.markdown("""
<style>
    .main-header {{
        font-size: 2.5rem;
        font-weight: bold;
        color: #1f77b4;
        text-align: center;
        margin-bottom: 2rem;
    }}
    .antipattern-card {{
        background-color: #ffebee;
        border-left: 4px solid #f44336;
        padding: 1rem;
        border-radius: 0.5rem;
        margin: 0.5rem 0;
        color: #000000 !important;
    }}
    .antipattern-card h5 {{
        color: #000000 !important;
        margin-bottom: 0.5rem;
    }}
    .antipattern-card p {{
        color: #000000 !important;
        margin-bottom: 0;
    }}
    .success-card {{
        background-color: #e8f5e8;
        border-left: 4px solid #4caf50;
        padding: 1rem;
        border-radius: 0.5rem;
        margin: 0.5rem 0;
        color: #000000 !important;
    }}
    .success-card h5 {{
        color: #000000 !important;
        margin-bottom: 0.5rem;
    }}
    .success-card p {{
        color: #000000 !important;
        margin-bottom: 0;
    }}
</style>
""", unsafe_allow_html=True)

def get_auth_token():
    try:
        credentials, _ = default()
        auth_req = Request()
        credentials.refresh(auth_req)
        return credentials.token
    except Exception as e:
        st.error(f"Authentication failed: {{str(e)}}")
        return None

def analyze_query(query):
    token = get_auth_token()
    if not token:
        return None

    headers = {{
        'Authorization': f'Bearer {{token}}',
        'Content-Type': 'application/json'
    }}

    payload = {{'calls': [[query]]}}

    try:
        response = requests.post(SERVICE_URL, headers=headers, json=payload, timeout=30)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        st.error(f"Analysis failed: {{str(e)}}")
        return None

def rewrite_query(query):
    token = get_auth_token()
    if not token:
        return None

    headers = {{
        'Authorization': f'Bearer {{token}}',
        'Content-Type': 'application/json'
    }}

    payload = {{'calls': [[query]]}}
    rewrite_url = f"{{SERVICE_URL}}/rewrite"

    try:
        response = requests.post(rewrite_url, headers=headers, json=payload, timeout=60)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        st.error(f"Rewrite failed: {{str(e)}}")
        return None

st.markdown('<div class="main-header">🔍 BigQuery Anti-Pattern Recognition</div>', unsafe_allow_html=True)

with st.sidebar:
    st.title("⚙️ Configuration")
    st.info(f"**Project:** {{PROJECT_ID}}")
    st.info(f"**Service:** {{SERVICE_URL}}")

    st.title("📝 Sample Queries")
    sample_queries = {{
        "SELECT * Anti-pattern": "SELECT * FROM `bigquery-public-data.samples.shakespeare` ORDER BY word_count DESC LIMIT 10",
        "ORDER BY without LIMIT": "SELECT word, COUNT(*) as word_count FROM `bigquery-public-data.samples.shakespeare` GROUP BY word ORDER BY word_count DESC",
        "REGEXP_CONTAINS misuse": "SELECT word FROM `bigquery-public-data.samples.shakespeare` WHERE REGEXP_CONTAINS(word, '.*love.*') LIMIT 100",
        "Clean query": "SELECT word, word_count FROM `bigquery-public-data.samples.shakespeare` WHERE word_count > 100 LIMIT 1000"
    }}

    selected_sample = st.selectbox("Choose a sample:", [""] + list(sample_queries.keys()))
    if selected_sample and st.button("📋 Load Sample"):
        st.session_state.sample_query = sample_queries[selected_sample]
        st.rerun()

st.markdown("### 1. 📝 Enter Your SQL Query")
default_query = getattr(st.session_state, 'sample_query', '')
if hasattr(st.session_state, 'sample_query'):
    delattr(st.session_state, 'sample_query')

query_text = st.text_area(
    "SQL Query:",
    value=default_query,
    height=150,
    placeholder="SELECT * FROM `project.dataset.table` WHERE condition ORDER BY column;",
    help="Enter your BigQuery SQL query here"
)

col1, col2, col3 = st.columns([1, 1, 2])

with col1:
    analyze_button = st.button(
        "🔍 Analyze Query",
        type="primary",
        disabled=not query_text.strip()
    )

with col2:
    clear_button = st.button("🗑️ Clear")

if clear_button:
    st.session_state.analysis_results = None
    st.session_state.rewrite_results = None
    st.rerun()

if analyze_button and query_text.strip():
    with st.spinner("🔍 Analyzing query for anti-patterns..."):
        results = analyze_query(query_text.strip())
        if results:
            st.session_state.analysis_results = results
            st.session_state.current_query = query_text.strip()

if hasattr(st.session_state, 'analysis_results') and st.session_state.analysis_results:
    st.markdown("### 2. 📊 Analysis Results")

    results = st.session_state.analysis_results

    antipatterns = []
    if 'replies' in results and results['replies']:
        reply = results['replies'][0]
        if 'antipatterns' in reply:
            antipatterns = reply['antipatterns']

    col1, col2, col3 = st.columns(3)
    with col1:
        st.metric("Anti-patterns Found", len(antipatterns))
    with col2:
        high_severity = sum(1 for ap in antipatterns if ap.get('severity', '').upper() == 'HIGH')
        st.metric("High Severity", high_severity)
    with col3:
        st.metric("Query Length", f"{{len(st.session_state.current_query)}} chars")

    if antipatterns:
        st.markdown("#### 🚨 Anti-patterns Detected")
        for i, ap in enumerate(antipatterns):
            name = ap.get('name', 'Unknown')
            description = ap.get('result', ap.get('description', 'No description available'))
            severity = ap.get('severity', 'MEDIUM')

            severity_emoji = {{
                'HIGH': '🔴',
                'MEDIUM': '🟡',
                'LOW': '🟠'
            }}.get(severity.upper(), '🟡')

            st.markdown(f"""
            <div class="antipattern-card">
                <h5>{{severity_emoji}} {{name}} ({{severity}})</h5>
                <p>{{description}}</p>
            </div>
            """, unsafe_allow_html=True)
    else:
        st.markdown("""
        <div class="success-card">
            <h5>✅ No Anti-patterns Detected</h5>
            <p>Your query looks good! No common anti-patterns were found.</p>
        </div>
        """, unsafe_allow_html=True)

    if antipatterns:
        st.markdown("### 3. 🤖 AI-Powered Query Rewriting")

        col1, col2 = st.columns([1, 3])
        with col1:
            rewrite_button = st.button(
                "🤖 Rewrite Query",
                type="secondary",
                help="Use AI to optimize the query and fix anti-patterns"
            )

        if rewrite_button:
            with st.spinner("🤖 AI is rewriting your query..."):
                rewrite_results = rewrite_query(st.session_state.current_query)
                if rewrite_results:
                    st.session_state.rewrite_results = rewrite_results

        if hasattr(st.session_state, 'rewrite_results') and st.session_state.rewrite_results:
            rewrite_data = st.session_state.rewrite_results

            optimized_sql = None
            if 'replies' in rewrite_data and rewrite_data['replies']:
                reply = rewrite_data['replies'][0]
                optimized_sql = reply.get('optimized_sql')

            if optimized_sql:
                st.markdown("#### 📝 Rewritten Query")

                col1, col2 = st.columns(2)

                with col1:
                    st.markdown("**Original Query:**")
                    st.code(st.session_state.current_query, language='sql')

                with col2:
                    st.markdown("**Optimized Query:**")
                    st.code(optimized_sql, language='sql')

                st.markdown("**Copy Optimized Query:**")
                st.text_area(
                    "Optimized SQL:",
                    value=optimized_sql,
                    height=100,
                    help="Copy this optimized query to use in your application"
                )

                st.success("✅ Query successfully optimized by AI!")
            else:
                st.warning("No optimized query was returned from the AI rewriter.")
                st.info("This might happen if the query is already well-optimized or if the AI couldn't find improvements.")

    with st.expander("🔍 View Raw API Responses"):
        st.markdown("#### 📊 Analysis API Response")
        st.json(results)

        if hasattr(st.session_state, 'rewrite_results') and st.session_state.rewrite_results:
            st.markdown("#### 🤖 AI Rewrite API Response")
            st.json(st.session_state.rewrite_results)
        else:
            st.info("AI Rewrite response will appear here after you click 'Rewrite Query'")

st.markdown("---")
st.markdown("""
<div style="text-align: center; color: #666; padding: 1rem;">
    <p>🔍 BigQuery Anti-Pattern Recognition Tool</p>
    <p>Built with Streamlit • Powered by Google Cloud</p>
</div>
""", unsafe_allow_html=True)
'''

with open('streamlit_app.py', 'w') as f:
    f.write(streamlit_app_code)

print("✅ Streamlit app created: streamlit_app.py")

In [None]:
import subprocess


def start_streamlit():
    print("🚀 Starting Streamlit server...")
    print("📍 Local URL: http://localhost:8501")
    subprocess.run(
        ['streamlit', 'run', 'streamlit_app.py', '--server.port=8501'])


# Uncomment to start the server
start_streamlit()