In [None]:
import streamlit as st
import pandas as pd
import altair as alt
from fpdf import FPDF
from io import BytesIO
from crawler import crawl_website

st.set_page_config(page_title="IR Web Crawler", layout="wide")
tab1, tab2, tab3 = st.tabs(["📌 Project Overview", "🔍 Analysis Results", "📤 Download Report"])

# Store crawl results
results = []

with tab1:
    st.title("IR Project: Intelligent Web Crawler & Analyzer")
    st.write("👤 Omar Ehab 232125 - UI & Report Designer")
    st.markdown("### Description")
    st.write(
        "This app allows you to crawl a website, analyze how many links each page contains, "
        "visualize the data, and download the results as CSV or PDF."
    )

with tab2:
    st.subheader("Analysis Results")
    start_url = st.text_input("Enter a URL to crawl", value="https://example.com")
    max_pages = st.slider("Max pages to crawl", 5, 50, 10)

    if st.button("Start Crawling"):
        with st.spinner("Crawling in progress..."):
            results = crawl_website(start_url, max_pages=max_pages)
        st.success(f"✅ Crawled {len(results)} pages.")

        # Filter valid results for visualization and export
        clean_results = [r for r in results if "error" not in r]
        df = pd.DataFrame(clean_results)

        for res in results:
            if "error" in res:
                st.error(f"{res['url']} - ❌ {res['error']}")
            else:
                st.write(f"🔗 [{res['title']}]({res['url']}) - {res['num_links']} links found")

        # Bar Chart
        if not df.empty:
            st.markdown("### 📊 Link Distribution")
            chart = (
                alt.Chart(df)
                .mark_bar()
                .encode(
                    x=alt.X("title", sort="-y", title="Page Title"),
                    y=alt.Y("num_links", title="Number of Links"),
                    tooltip=["title", "num_links", "url"]
                )
                .properties(width=700)
            )
            st.altair_chart(chart, use_container_width=True)

with tab3:
    st.subheader("📥 Export Results")

    if results:
        clean_results = [r for r in results if "error" not in r]
        df = pd.DataFrame(clean_results)

        # Download as CSV
        csv = df.to_csv(index=False).encode("utf-8")
        st.download_button("⬇ Download CSV", data=csv, file_name="crawl_results.csv", mime="text/csv")

        # Generate PDF
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", "B", 16)
        pdf.cell(0, 10, "Web Crawl Report", ln=True, align="C")
        pdf.ln(10)

        pdf.set_font("Arial", size=12)
        for r in clean_results:
            pdf.multi_cell(0, 10, f"{r['title']} ({r['url']}) - {r['num_links']} links", border=0)
            pdf.ln(1)

        # Export PDF to bytes
        pdf_bytes = pdf.output(dest='S').encode('latin1')
        st.download_button(
            "🧾 Download PDF",
            data=pdf_bytes,
            file_name="crawl_report.pdf",
            mime="application/pdf"
        )

    else:
        st.info("🔍 Run a crawl first to enable downloads.")

ModuleNotFoundError: No module named 'streamlit'